2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob2f77e8448ee62441c7c39d73ddd8ab9fffdd6196
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "target.h"
34 #include "predict.h"
35 #include "hard-reg-set.h"
36 #include "function.h"
37 #include "dominance.h"
38 #include "cfg.h"
39 #include "basic-block.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-ssa-alias.h"
42 #include "internal-fn.h"
43 #include "tree-eh.h"
44 #include "gimple-expr.h"
45 #include "is-a.h"
46 #include "gimple.h"
47 #include "gimplify.h"
48 #include "gimple-iterator.h"
49 #include "gimplify-me.h"
50 #include "gimple-ssa.h"
51 #include "tree-cfg.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-ssa-loop-manip.h"
57 #include "cfgloop.h"
58 #include "tree-ssa-loop.h"
59 #include "tree-scalar-evolution.h"
60 #include "rtl.h"
61 #include "flags.h"
62 #include "insn-config.h"
63 #include "expmed.h"
64 #include "dojump.h"
65 #include "explow.h"
66 #include "calls.h"
67 #include "emit-rtl.h"
68 #include "varasm.h"
69 #include "stmt.h"
70 #include "expr.h"
71 #include "recog.h" /* FIXME: for insn_data */
72 #include "insn-codes.h"
73 #include "optabs.h"
74 #include "diagnostic-core.h"
75 #include "tree-vectorizer.h"
76 #include "plugin-api.h"
77 #include "ipa-ref.h"
78 #include "cgraph.h"
79 #include "builtins.h"
81 /* For lang_hooks.types.type_for_mode. */
82 #include "langhooks.h"
84 /* Return the vectorized type for the given statement. */
86 tree
87 stmt_vectype (struct _stmt_vec_info *stmt_info)
89 return STMT_VINFO_VECTYPE (stmt_info);
92 /* Return TRUE iff the given statement is in an inner loop relative to
93 the loop being vectorized. */
94 bool
95 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
97 gimple stmt = STMT_VINFO_STMT (stmt_info);
98 basic_block bb = gimple_bb (stmt);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
100 struct loop* loop;
102 if (!loop_vinfo)
103 return false;
105 loop = LOOP_VINFO_LOOP (loop_vinfo);
107 return (bb->loop_father == loop->inner);
110 /* Record the cost of a statement, either by directly informing the
111 target model or by saving it in a vector for later processing.
112 Return a preliminary estimate of the statement's cost. */
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 int misalign, enum vect_cost_model_location where)
119 if (body_cost_vec)
121 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
122 add_stmt_info_to_vec (body_cost_vec, count, kind,
123 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
124 misalign);
125 return (unsigned)
126 (builtin_vectorization_cost (kind, vectype, misalign) * count);
129 else
131 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
132 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
133 void *target_cost_data;
135 if (loop_vinfo)
136 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
137 else
138 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
140 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
141 misalign, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
161 tree array, unsigned HOST_WIDE_INT n)
163 tree vect_type, vect, vect_name, array_ref;
164 gimple new_stmt;
166 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
167 vect_type = TREE_TYPE (TREE_TYPE (array));
168 vect = vect_create_destination_var (scalar_dest, vect_type);
169 array_ref = build4 (ARRAY_REF, vect_type, array,
170 build_int_cst (size_type_node, n),
171 NULL_TREE, NULL_TREE);
173 new_stmt = gimple_build_assign (vect, array_ref);
174 vect_name = make_ssa_name (vect, new_stmt);
175 gimple_assign_set_lhs (new_stmt, vect_name);
176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
178 return vect_name;
181 /* ARRAY is an array of vectors created by create_vector_array.
182 Emit code to store SSA_NAME VECT in index N of the array.
183 The store is part of the vectorization of STMT. */
185 static void
186 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
187 tree array, unsigned HOST_WIDE_INT n)
189 tree array_ref;
190 gimple new_stmt;
192 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
193 build_int_cst (size_type_node, n),
194 NULL_TREE, NULL_TREE);
196 new_stmt = gimple_build_assign (array_ref, vect);
197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
200 /* PTR is a pointer to an array of type TYPE. Return a representation
201 of *PTR. The memory reference replaces those in FIRST_DR
202 (and its group). */
204 static tree
205 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
207 tree mem_ref, alias_ptr_type;
209 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
210 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
211 /* Arrays have the same alignment as their type. */
212 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
213 return mem_ref;
216 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
218 /* Function vect_mark_relevant.
220 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
222 static void
223 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
224 enum vect_relevant relevant, bool live_p,
225 bool used_in_pattern)
227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
228 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
229 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
230 gimple pattern_stmt;
232 if (dump_enabled_p ())
233 dump_printf_loc (MSG_NOTE, vect_location,
234 "mark relevant %d, live %d.\n", relevant, live_p);
236 /* If this stmt is an original stmt in a pattern, we might need to mark its
237 related pattern stmt instead of the original stmt. However, such stmts
238 may have their own uses that are not in any pattern, in such cases the
239 stmt itself should be marked. */
240 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
242 bool found = false;
243 if (!used_in_pattern)
245 imm_use_iterator imm_iter;
246 use_operand_p use_p;
247 gimple use_stmt;
248 tree lhs;
249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
250 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
252 if (is_gimple_assign (stmt))
253 lhs = gimple_assign_lhs (stmt);
254 else
255 lhs = gimple_call_lhs (stmt);
257 /* This use is out of pattern use, if LHS has other uses that are
258 pattern uses, we should mark the stmt itself, and not the pattern
259 stmt. */
260 if (lhs && TREE_CODE (lhs) == SSA_NAME)
261 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
263 if (is_gimple_debug (USE_STMT (use_p)))
264 continue;
265 use_stmt = USE_STMT (use_p);
267 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
268 continue;
270 if (vinfo_for_stmt (use_stmt)
271 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
273 found = true;
274 break;
279 if (!found)
281 /* This is the last stmt in a sequence that was detected as a
282 pattern that can potentially be vectorized. Don't mark the stmt
283 as relevant/live because it's not going to be vectorized.
284 Instead mark the pattern-stmt that replaces it. */
286 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
288 if (dump_enabled_p ())
289 dump_printf_loc (MSG_NOTE, vect_location,
290 "last stmt in pattern. don't mark"
291 " relevant/live.\n");
292 stmt_info = vinfo_for_stmt (pattern_stmt);
293 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
294 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
295 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
296 stmt = pattern_stmt;
300 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
301 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
302 STMT_VINFO_RELEVANT (stmt_info) = relevant;
304 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
305 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_NOTE, vect_location,
309 "already marked relevant/live.\n");
310 return;
313 worklist->safe_push (stmt);
317 /* Function vect_stmt_relevant_p.
319 Return true if STMT in loop that is represented by LOOP_VINFO is
320 "relevant for vectorization".
322 A stmt is considered "relevant for vectorization" if:
323 - it has uses outside the loop.
324 - it has vdefs (it alters memory).
325 - control stmts in the loop (except for the exit condition).
327 CHECKME: what other side effects would the vectorizer allow? */
329 static bool
330 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
331 enum vect_relevant *relevant, bool *live_p)
333 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
334 ssa_op_iter op_iter;
335 imm_use_iterator imm_iter;
336 use_operand_p use_p;
337 def_operand_p def_p;
339 *relevant = vect_unused_in_scope;
340 *live_p = false;
342 /* cond stmt other than loop exit cond. */
343 if (is_ctrl_stmt (stmt)
344 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
345 != loop_exit_ctrl_vec_info_type)
346 *relevant = vect_used_in_scope;
348 /* changing memory. */
349 if (gimple_code (stmt) != GIMPLE_PHI)
350 if (gimple_vdef (stmt)
351 && !gimple_clobber_p (stmt))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt has vdefs.\n");
356 *relevant = vect_used_in_scope;
359 /* uses outside the loop. */
360 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
362 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
364 basic_block bb = gimple_bb (USE_STMT (use_p));
365 if (!flow_bb_inside_loop_p (loop, bb))
367 if (dump_enabled_p ())
368 dump_printf_loc (MSG_NOTE, vect_location,
369 "vec_stmt_relevant_p: used out of loop.\n");
371 if (is_gimple_debug (USE_STMT (use_p)))
372 continue;
374 /* We expect all such uses to be in the loop exit phis
375 (because of loop closed form) */
376 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
377 gcc_assert (bb == single_exit (loop)->dest);
379 *live_p = true;
384 return (*live_p || *relevant);
388 /* Function exist_non_indexing_operands_for_use_p
390 USE is one of the uses attached to STMT. Check if USE is
391 used in STMT for anything other than indexing an array. */
393 static bool
394 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
396 tree operand;
397 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
399 /* USE corresponds to some operand in STMT. If there is no data
400 reference in STMT, then any operand that corresponds to USE
401 is not indexing an array. */
402 if (!STMT_VINFO_DATA_REF (stmt_info))
403 return true;
405 /* STMT has a data_ref. FORNOW this means that its of one of
406 the following forms:
407 -1- ARRAY_REF = var
408 -2- var = ARRAY_REF
409 (This should have been verified in analyze_data_refs).
411 'var' in the second case corresponds to a def, not a use,
412 so USE cannot correspond to any operands that are not used
413 for array indexing.
415 Therefore, all we need to check is if STMT falls into the
416 first case, and whether var corresponds to USE. */
418 if (!gimple_assign_copy_p (stmt))
420 if (is_gimple_call (stmt)
421 && gimple_call_internal_p (stmt))
422 switch (gimple_call_internal_fn (stmt))
424 case IFN_MASK_STORE:
425 operand = gimple_call_arg (stmt, 3);
426 if (operand == use)
427 return true;
428 /* FALLTHRU */
429 case IFN_MASK_LOAD:
430 operand = gimple_call_arg (stmt, 2);
431 if (operand == use)
432 return true;
433 break;
434 default:
435 break;
437 return false;
440 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
441 return false;
442 operand = gimple_assign_rhs1 (stmt);
443 if (TREE_CODE (operand) != SSA_NAME)
444 return false;
446 if (operand == use)
447 return true;
449 return false;
454 Function process_use.
456 Inputs:
457 - a USE in STMT in a loop represented by LOOP_VINFO
458 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
459 that defined USE. This is done by calling mark_relevant and passing it
460 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
461 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
462 be performed.
464 Outputs:
465 Generally, LIVE_P and RELEVANT are used to define the liveness and
466 relevance info of the DEF_STMT of this USE:
467 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
468 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
469 Exceptions:
470 - case 1: If USE is used only for address computations (e.g. array indexing),
471 which does not need to be directly vectorized, then the liveness/relevance
472 of the respective DEF_STMT is left unchanged.
473 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
474 skip DEF_STMT cause it had already been processed.
475 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
476 be modified accordingly.
478 Return true if everything is as expected. Return false otherwise. */
480 static bool
481 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
482 enum vect_relevant relevant, vec<gimple> *worklist,
483 bool force)
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
486 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
487 stmt_vec_info dstmt_vinfo;
488 basic_block bb, def_bb;
489 tree def;
490 gimple def_stmt;
491 enum vect_def_type dt;
493 /* case 1: we are only interested in uses that need to be vectorized. Uses
494 that are used for address computation are not considered relevant. */
495 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
496 return true;
498 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
502 "not vectorized: unsupported use in stmt.\n");
503 return false;
506 if (!def_stmt || gimple_nop_p (def_stmt))
507 return true;
509 def_bb = gimple_bb (def_stmt);
510 if (!flow_bb_inside_loop_p (loop, def_bb))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
514 return true;
517 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
518 DEF_STMT must have already been processed, because this should be the
519 only way that STMT, which is a reduction-phi, was put in the worklist,
520 as there should be no other uses for DEF_STMT in the loop. So we just
521 check that everything is as expected, and we are done. */
522 dstmt_vinfo = vinfo_for_stmt (def_stmt);
523 bb = gimple_bb (stmt);
524 if (gimple_code (stmt) == GIMPLE_PHI
525 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
526 && gimple_code (def_stmt) != GIMPLE_PHI
527 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
528 && bb->loop_father == def_bb->loop_father)
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_NOTE, vect_location,
532 "reduc-stmt defining reduc-phi in the same nest.\n");
533 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
534 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
535 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
536 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
537 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
538 return true;
541 /* case 3a: outer-loop stmt defining an inner-loop stmt:
542 outer-loop-header-bb:
543 d = def_stmt
544 inner-loop:
545 stmt # use (d)
546 outer-loop-tail-bb:
547 ... */
548 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "outer-loop def-stmt defining inner-loop stmt.\n");
554 switch (relevant)
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
558 vect_used_in_scope : vect_unused_in_scope;
559 break;
561 case vect_used_in_outer_by_reduction:
562 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
563 relevant = vect_used_by_reduction;
564 break;
566 case vect_used_in_outer:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_in_scope;
569 break;
571 case vect_used_in_scope:
572 break;
574 default:
575 gcc_unreachable ();
579 /* case 3b: inner-loop stmt defining an outer-loop stmt:
580 outer-loop-header-bb:
582 inner-loop:
583 d = def_stmt
584 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
585 stmt # use (d) */
586 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
588 if (dump_enabled_p ())
589 dump_printf_loc (MSG_NOTE, vect_location,
590 "inner-loop def-stmt defining outer-loop stmt.\n");
592 switch (relevant)
594 case vect_unused_in_scope:
595 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
596 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
597 vect_used_in_outer_by_reduction : vect_unused_in_scope;
598 break;
600 case vect_used_by_reduction:
601 relevant = vect_used_in_outer_by_reduction;
602 break;
604 case vect_used_in_scope:
605 relevant = vect_used_in_outer;
606 break;
608 default:
609 gcc_unreachable ();
613 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
614 is_pattern_stmt_p (stmt_vinfo));
615 return true;
619 /* Function vect_mark_stmts_to_be_vectorized.
621 Not all stmts in the loop need to be vectorized. For example:
623 for i...
624 for j...
625 1. T0 = i + j
626 2. T1 = a[T0]
628 3. j = j + 1
630 Stmt 1 and 3 do not need to be vectorized, because loop control and
631 addressing of vectorized data-refs are handled differently.
633 This pass detects such stmts. */
635 bool
636 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
638 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
639 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
640 unsigned int nbbs = loop->num_nodes;
641 gimple_stmt_iterator si;
642 gimple stmt;
643 unsigned int i;
644 stmt_vec_info stmt_vinfo;
645 basic_block bb;
646 gimple phi;
647 bool live_p;
648 enum vect_relevant relevant, tmp_relevant;
649 enum vect_def_type def_type;
651 if (dump_enabled_p ())
652 dump_printf_loc (MSG_NOTE, vect_location,
653 "=== vect_mark_stmts_to_be_vectorized ===\n");
655 auto_vec<gimple, 64> worklist;
657 /* 1. Init worklist. */
658 for (i = 0; i < nbbs; i++)
660 bb = bbs[i];
661 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
663 phi = gsi_stmt (si);
664 if (dump_enabled_p ())
666 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
670 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
671 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
673 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
675 stmt = gsi_stmt (si);
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
682 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
683 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
687 /* 2. Process_worklist */
688 while (worklist.length () > 0)
690 use_operand_p use_p;
691 ssa_op_iter iter;
693 stmt = worklist.pop ();
694 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
697 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
700 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
701 (DEF_STMT) as relevant/irrelevant and live/dead according to the
702 liveness and relevance properties of STMT. */
703 stmt_vinfo = vinfo_for_stmt (stmt);
704 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
705 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
707 /* Generally, the liveness and relevance properties of STMT are
708 propagated as is to the DEF_STMTs of its USEs:
709 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
710 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
712 One exception is when STMT has been identified as defining a reduction
713 variable; in this case we set the liveness/relevance as follows:
714 live_p = false
715 relevant = vect_used_by_reduction
716 This is because we distinguish between two kinds of relevant stmts -
717 those that are used by a reduction computation, and those that are
718 (also) used by a regular computation. This allows us later on to
719 identify stmts that are used solely by a reduction, and therefore the
720 order of the results that they produce does not have to be kept. */
722 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
723 tmp_relevant = relevant;
724 switch (def_type)
726 case vect_reduction_def:
727 switch (tmp_relevant)
729 case vect_unused_in_scope:
730 relevant = vect_used_by_reduction;
731 break;
733 case vect_used_by_reduction:
734 if (gimple_code (stmt) == GIMPLE_PHI)
735 break;
736 /* fall through */
738 default:
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741 "unsupported use of reduction.\n");
742 return false;
745 live_p = false;
746 break;
748 case vect_nested_cycle:
749 if (tmp_relevant != vect_unused_in_scope
750 && tmp_relevant != vect_used_in_outer_by_reduction
751 && tmp_relevant != vect_used_in_outer)
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "unsupported use of nested cycle.\n");
757 return false;
760 live_p = false;
761 break;
763 case vect_double_reduction_def:
764 if (tmp_relevant != vect_unused_in_scope
765 && tmp_relevant != vect_used_by_reduction)
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "unsupported use of double reduction.\n");
771 return false;
774 live_p = false;
775 break;
777 default:
778 break;
781 if (is_pattern_stmt_p (stmt_vinfo))
783 /* Pattern statements are not inserted into the code, so
784 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
785 have to scan the RHS or function arguments instead. */
786 if (is_gimple_assign (stmt))
788 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
789 tree op = gimple_assign_rhs1 (stmt);
791 i = 1;
792 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
794 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
795 live_p, relevant, &worklist, false)
796 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
797 live_p, relevant, &worklist, false))
798 return false;
799 i = 2;
801 for (; i < gimple_num_ops (stmt); i++)
803 op = gimple_op (stmt, i);
804 if (TREE_CODE (op) == SSA_NAME
805 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
806 &worklist, false))
807 return false;
810 else if (is_gimple_call (stmt))
812 for (i = 0; i < gimple_call_num_args (stmt); i++)
814 tree arg = gimple_call_arg (stmt, i);
815 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
816 &worklist, false))
817 return false;
821 else
822 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
824 tree op = USE_FROM_PTR (use_p);
825 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
826 &worklist, false))
827 return false;
830 if (STMT_VINFO_GATHER_P (stmt_vinfo))
832 tree off;
833 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
834 gcc_assert (decl);
835 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
836 &worklist, true))
837 return false;
839 } /* while worklist */
841 return true;
845 /* Function vect_model_simple_cost.
847 Models cost for simple operations, i.e. those that only emit ncopies of a
848 single op. Right now, this does not account for multiple insns that could
849 be generated for the single vector op. We will handle that shortly. */
851 void
852 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
853 enum vect_def_type *dt,
854 stmt_vector_for_cost *prologue_cost_vec,
855 stmt_vector_for_cost *body_cost_vec)
857 int i;
858 int inside_cost = 0, prologue_cost = 0;
860 /* The SLP costs were already calculated during SLP tree build. */
861 if (PURE_SLP_STMT (stmt_info))
862 return;
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 /* Pass the inside-of-loop statements to the target-specific cost model. */
871 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
872 stmt_info, 0, vect_body);
874 if (dump_enabled_p ())
875 dump_printf_loc (MSG_NOTE, vect_location,
876 "vect_model_simple_cost: inside_cost = %d, "
877 "prologue_cost = %d .\n", inside_cost, prologue_cost);
881 /* Model cost for type demotion and promotion operations. PWR is normally
882 zero for single-step promotions and demotions. It will be one if
883 two-step promotion/demotion is required, and so on. Each additional
884 step doubles the number of instructions required. */
886 static void
887 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
888 enum vect_def_type *dt, int pwr)
890 int i, tmp;
891 int inside_cost = 0, prologue_cost = 0;
892 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
893 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
894 void *target_cost_data;
896 /* The SLP costs were already calculated during SLP tree build. */
897 if (PURE_SLP_STMT (stmt_info))
898 return;
900 if (loop_vinfo)
901 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
902 else
903 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
905 for (i = 0; i < pwr + 1; i++)
907 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
908 (i + 1) : i;
909 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
910 vec_promote_demote, stmt_info, 0,
911 vect_body);
914 /* FORNOW: Assuming maximum 2 args per stmts. */
915 for (i = 0; i < 2; i++)
916 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
917 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
918 stmt_info, 0, vect_prologue);
920 if (dump_enabled_p ())
921 dump_printf_loc (MSG_NOTE, vect_location,
922 "vect_model_promotion_demotion_cost: inside_cost = %d, "
923 "prologue_cost = %d .\n", inside_cost, prologue_cost);
926 /* Function vect_cost_group_size
928 For grouped load or store, return the group_size only if it is the first
929 load or store of a group, else return 1. This ensures that group size is
930 only returned once per group. */
932 static int
933 vect_cost_group_size (stmt_vec_info stmt_info)
935 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
937 if (first_stmt == STMT_VINFO_STMT (stmt_info))
938 return GROUP_SIZE (stmt_info);
940 return 1;
944 /* Function vect_model_store_cost
946 Models cost for stores. In the case of grouped accesses, one access
947 has the overhead of the grouped access attributed to it. */
949 void
950 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
951 bool store_lanes_p, enum vect_def_type dt,
952 slp_tree slp_node,
953 stmt_vector_for_cost *prologue_cost_vec,
954 stmt_vector_for_cost *body_cost_vec)
956 int group_size;
957 unsigned int inside_cost = 0, prologue_cost = 0;
958 struct data_reference *first_dr;
959 gimple first_stmt;
961 if (dt == vect_constant_def || dt == vect_external_def)
962 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
963 stmt_info, 0, vect_prologue);
965 /* Grouped access? */
966 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
968 if (slp_node)
970 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
971 group_size = 1;
973 else
975 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
976 group_size = vect_cost_group_size (stmt_info);
979 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
981 /* Not a grouped access. */
982 else
984 group_size = 1;
985 first_dr = STMT_VINFO_DATA_REF (stmt_info);
988 /* We assume that the cost of a single store-lanes instruction is
989 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
990 access is instead being provided by a permute-and-store operation,
991 include the cost of the permutes. */
992 if (!store_lanes_p && group_size > 1
993 && !STMT_VINFO_STRIDED_P (stmt_info))
995 /* Uses a high and low interleave or shuffle operations for each
996 needed permute. */
997 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
998 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
999 stmt_info, 0, vect_body);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: strided group_size = %d .\n",
1004 group_size);
1007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1008 /* Costs of the stores. */
1009 if (STMT_VINFO_STRIDED_P (stmt_info)
1010 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1012 /* N scalar stores plus extracting the elements. */
1013 inside_cost += record_stmt_cost (body_cost_vec,
1014 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1015 scalar_store, stmt_info, 0, vect_body);
1017 else
1018 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1020 if (STMT_VINFO_STRIDED_P (stmt_info))
1021 inside_cost += record_stmt_cost (body_cost_vec,
1022 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1023 vec_to_scalar, stmt_info, 0, vect_body);
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "vect_model_store_cost: inside_cost = %d, "
1028 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1032 /* Calculate cost of DR's memory access. */
1033 void
1034 vect_get_store_cost (struct data_reference *dr, int ncopies,
1035 unsigned int *inside_cost,
1036 stmt_vector_for_cost *body_cost_vec)
1038 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1039 gimple stmt = DR_STMT (dr);
1040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1042 switch (alignment_support_scheme)
1044 case dr_aligned:
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vector_store, stmt_info, 0,
1048 vect_body);
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "vect_model_store_cost: aligned.\n");
1053 break;
1056 case dr_unaligned_supported:
1058 /* Here, we assign an additional cost for the unaligned store. */
1059 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1060 unaligned_store, stmt_info,
1061 DR_MISALIGNMENT (dr), vect_body);
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_NOTE, vect_location,
1064 "vect_model_store_cost: unaligned supported by "
1065 "hardware.\n");
1066 break;
1069 case dr_unaligned_unsupported:
1071 *inside_cost = VECT_MAX_COST;
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1075 "vect_model_store_cost: unsupported access.\n");
1076 break;
1079 default:
1080 gcc_unreachable ();
1085 /* Function vect_model_load_cost
1087 Models cost for loads. In the case of grouped accesses, the last access
1088 has the overhead of the grouped access attributed to it. Since unaligned
1089 accesses are supported for loads, we also account for the costs of the
1090 access scheme chosen. */
1092 void
1093 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1094 bool load_lanes_p, slp_tree slp_node,
1095 stmt_vector_for_cost *prologue_cost_vec,
1096 stmt_vector_for_cost *body_cost_vec)
1098 int group_size;
1099 gimple first_stmt;
1100 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1101 unsigned int inside_cost = 0, prologue_cost = 0;
1103 /* Grouped accesses? */
1104 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1105 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1107 group_size = vect_cost_group_size (stmt_info);
1108 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1110 /* Not a grouped access. */
1111 else
1113 group_size = 1;
1114 first_dr = dr;
1117 /* We assume that the cost of a single load-lanes instruction is
1118 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1119 access is instead being provided by a load-and-permute operation,
1120 include the cost of the permutes. */
1121 if (!load_lanes_p && group_size > 1
1122 && !STMT_VINFO_STRIDED_P (stmt_info))
1124 /* Uses an even and odd extract operations or shuffle operations
1125 for each needed permute. */
1126 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1127 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1128 stmt_info, 0, vect_body);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: strided group_size = %d .\n",
1133 group_size);
1136 /* The loads themselves. */
1137 if (STMT_VINFO_STRIDED_P (stmt_info)
1138 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1140 /* N scalar loads plus gathering them into a vector. */
1141 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1142 inside_cost += record_stmt_cost (body_cost_vec,
1143 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1144 scalar_load, stmt_info, 0, vect_body);
1146 else
1147 vect_get_load_cost (first_dr, ncopies,
1148 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1149 || group_size > 1 || slp_node),
1150 &inside_cost, &prologue_cost,
1151 prologue_cost_vec, body_cost_vec, true);
1152 if (STMT_VINFO_STRIDED_P (stmt_info))
1153 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1154 stmt_info, 0, vect_body);
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: inside_cost = %d, "
1159 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1163 /* Calculate cost of DR's memory access. */
1164 void
1165 vect_get_load_cost (struct data_reference *dr, int ncopies,
1166 bool add_realign_cost, unsigned int *inside_cost,
1167 unsigned int *prologue_cost,
1168 stmt_vector_for_cost *prologue_cost_vec,
1169 stmt_vector_for_cost *body_cost_vec,
1170 bool record_prologue_costs)
1172 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1173 gimple stmt = DR_STMT (dr);
1174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1176 switch (alignment_support_scheme)
1178 case dr_aligned:
1180 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1181 stmt_info, 0, vect_body);
1183 if (dump_enabled_p ())
1184 dump_printf_loc (MSG_NOTE, vect_location,
1185 "vect_model_load_cost: aligned.\n");
1187 break;
1189 case dr_unaligned_supported:
1191 /* Here, we assign an additional cost for the unaligned load. */
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1193 unaligned_load, stmt_info,
1194 DR_MISALIGNMENT (dr), vect_body);
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned supported by "
1199 "hardware.\n");
1201 break;
1203 case dr_explicit_realign:
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1206 vector_load, stmt_info, 0, vect_body);
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1208 vec_perm, stmt_info, 0, vect_body);
1210 /* FIXME: If the misalignment remains fixed across the iterations of
1211 the containing loop, the following cost should be added to the
1212 prologue costs. */
1213 if (targetm.vectorize.builtin_mask_for_load)
1214 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1215 stmt_info, 0, vect_body);
1217 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "vect_model_load_cost: explicit realign\n");
1221 break;
1223 case dr_explicit_realign_optimized:
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_NOTE, vect_location,
1227 "vect_model_load_cost: unaligned software "
1228 "pipelined.\n");
1230 /* Unaligned software pipeline has a load of an address, an initial
1231 load, and possibly a mask operation to "prime" the loop. However,
1232 if this is an access in a group of loads, which provide grouped
1233 access, then the above cost should only be considered for one
1234 access in the group. Inside the loop, there is a load op
1235 and a realignment op. */
1237 if (add_realign_cost && record_prologue_costs)
1239 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1240 vector_stmt, stmt_info,
1241 0, vect_prologue);
1242 if (targetm.vectorize.builtin_mask_for_load)
1243 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1244 vector_stmt, stmt_info,
1245 0, vect_prologue);
1248 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1249 stmt_info, 0, vect_body);
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1251 stmt_info, 0, vect_body);
1253 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE, vect_location,
1255 "vect_model_load_cost: explicit realign optimized"
1256 "\n");
1258 break;
1261 case dr_unaligned_unsupported:
1263 *inside_cost = VECT_MAX_COST;
1265 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1267 "vect_model_load_cost: unsupported access.\n");
1268 break;
1271 default:
1272 gcc_unreachable ();
1276 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1277 the loop preheader for the vectorized stmt STMT. */
1279 static void
1280 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1282 if (gsi)
1283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1284 else
1286 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1289 if (loop_vinfo)
1291 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1292 basic_block new_bb;
1293 edge pe;
1295 if (nested_in_vect_loop_p (loop, stmt))
1296 loop = loop->inner;
1298 pe = loop_preheader_edge (loop);
1299 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1300 gcc_assert (!new_bb);
1302 else
1304 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1305 basic_block bb;
1306 gimple_stmt_iterator gsi_bb_start;
1308 gcc_assert (bb_vinfo);
1309 bb = BB_VINFO_BB (bb_vinfo);
1310 gsi_bb_start = gsi_after_labels (bb);
1311 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1315 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "created new init_stmt: ");
1319 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1323 /* Function vect_init_vector.
1325 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1326 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1327 vector type a vector with all elements equal to VAL is created first.
1328 Place the initialization at BSI if it is not NULL. Otherwise, place the
1329 initialization at the loop preheader.
1330 Return the DEF of INIT_STMT.
1331 It will be used in the vectorization of STMT. */
1333 tree
1334 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1336 tree new_var;
1337 gimple init_stmt;
1338 tree vec_oprnd;
1339 tree new_temp;
1341 if (TREE_CODE (type) == VECTOR_TYPE
1342 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1344 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1346 if (CONSTANT_CLASS_P (val))
1347 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1348 else
1350 new_temp = make_ssa_name (TREE_TYPE (type));
1351 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1352 vect_init_vector_1 (stmt, init_stmt, gsi);
1353 val = new_temp;
1356 val = build_vector_from_val (type, val);
1359 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1360 init_stmt = gimple_build_assign (new_var, val);
1361 new_temp = make_ssa_name (new_var, init_stmt);
1362 gimple_assign_set_lhs (init_stmt, new_temp);
1363 vect_init_vector_1 (stmt, init_stmt, gsi);
1364 vec_oprnd = gimple_assign_lhs (init_stmt);
1365 return vec_oprnd;
1369 /* Function vect_get_vec_def_for_operand.
1371 OP is an operand in STMT. This function returns a (vector) def that will be
1372 used in the vectorized stmt for STMT.
1374 In the case that OP is an SSA_NAME which is defined in the loop, then
1375 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1377 In case OP is an invariant or constant, a new stmt that creates a vector def
1378 needs to be introduced. */
1380 tree
1381 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1383 tree vec_oprnd;
1384 gimple vec_stmt;
1385 gimple def_stmt;
1386 stmt_vec_info def_stmt_info = NULL;
1387 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1388 unsigned int nunits;
1389 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1390 tree def;
1391 enum vect_def_type dt;
1392 bool is_simple_use;
1393 tree vector_type;
1395 if (dump_enabled_p ())
1397 dump_printf_loc (MSG_NOTE, vect_location,
1398 "vect_get_vec_def_for_operand: ");
1399 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1400 dump_printf (MSG_NOTE, "\n");
1403 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1404 &def_stmt, &def, &dt);
1405 gcc_assert (is_simple_use);
1406 if (dump_enabled_p ())
1408 int loc_printed = 0;
1409 if (def)
1411 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1412 loc_printed = 1;
1413 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1414 dump_printf (MSG_NOTE, "\n");
1416 if (def_stmt)
1418 if (loc_printed)
1419 dump_printf (MSG_NOTE, " def_stmt = ");
1420 else
1421 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1422 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1426 switch (dt)
1428 /* Case 1: operand is a constant. */
1429 case vect_constant_def:
1431 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1432 gcc_assert (vector_type);
1433 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1435 if (scalar_def)
1436 *scalar_def = op;
1438 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location,
1441 "Create vector_cst. nunits = %d\n", nunits);
1443 return vect_init_vector (stmt, op, vector_type, NULL);
1446 /* Case 2: operand is defined outside the loop - loop invariant. */
1447 case vect_external_def:
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1450 gcc_assert (vector_type);
1452 if (scalar_def)
1453 *scalar_def = def;
1455 /* Create 'vec_inv = {inv,inv,..,inv}' */
1456 if (dump_enabled_p ())
1457 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1459 return vect_init_vector (stmt, def, vector_type, NULL);
1462 /* Case 3: operand is defined inside the loop. */
1463 case vect_internal_def:
1465 if (scalar_def)
1466 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1468 /* Get the def from the vectorized stmt. */
1469 def_stmt_info = vinfo_for_stmt (def_stmt);
1471 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1472 /* Get vectorized pattern statement. */
1473 if (!vec_stmt
1474 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1475 && !STMT_VINFO_RELEVANT (def_stmt_info))
1476 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1477 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1478 gcc_assert (vec_stmt);
1479 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1480 vec_oprnd = PHI_RESULT (vec_stmt);
1481 else if (is_gimple_call (vec_stmt))
1482 vec_oprnd = gimple_call_lhs (vec_stmt);
1483 else
1484 vec_oprnd = gimple_assign_lhs (vec_stmt);
1485 return vec_oprnd;
1488 /* Case 4: operand is defined by a loop header phi - reduction */
1489 case vect_reduction_def:
1490 case vect_double_reduction_def:
1491 case vect_nested_cycle:
1493 struct loop *loop;
1495 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1496 loop = (gimple_bb (def_stmt))->loop_father;
1498 /* Get the def before the loop */
1499 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1500 return get_initial_def_for_reduction (stmt, op, scalar_def);
1503 /* Case 5: operand is defined by loop-header phi - induction. */
1504 case vect_induction_def:
1506 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1508 /* Get the def from the vectorized stmt. */
1509 def_stmt_info = vinfo_for_stmt (def_stmt);
1510 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1511 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1512 vec_oprnd = PHI_RESULT (vec_stmt);
1513 else
1514 vec_oprnd = gimple_get_lhs (vec_stmt);
1515 return vec_oprnd;
1518 default:
1519 gcc_unreachable ();
1524 /* Function vect_get_vec_def_for_stmt_copy
1526 Return a vector-def for an operand. This function is used when the
1527 vectorized stmt to be created (by the caller to this function) is a "copy"
1528 created in case the vectorized result cannot fit in one vector, and several
1529 copies of the vector-stmt are required. In this case the vector-def is
1530 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1531 of the stmt that defines VEC_OPRND.
1532 DT is the type of the vector def VEC_OPRND.
1534 Context:
1535 In case the vectorization factor (VF) is bigger than the number
1536 of elements that can fit in a vectype (nunits), we have to generate
1537 more than one vector stmt to vectorize the scalar stmt. This situation
1538 arises when there are multiple data-types operated upon in the loop; the
1539 smallest data-type determines the VF, and as a result, when vectorizing
1540 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1541 vector stmt (each computing a vector of 'nunits' results, and together
1542 computing 'VF' results in each iteration). This function is called when
1543 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1544 which VF=16 and nunits=4, so the number of copies required is 4):
1546 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1548 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1549 VS1.1: vx.1 = memref1 VS1.2
1550 VS1.2: vx.2 = memref2 VS1.3
1551 VS1.3: vx.3 = memref3
1553 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1554 VSnew.1: vz1 = vx.1 + ... VSnew.2
1555 VSnew.2: vz2 = vx.2 + ... VSnew.3
1556 VSnew.3: vz3 = vx.3 + ...
1558 The vectorization of S1 is explained in vectorizable_load.
1559 The vectorization of S2:
1560 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1561 the function 'vect_get_vec_def_for_operand' is called to
1562 get the relevant vector-def for each operand of S2. For operand x it
1563 returns the vector-def 'vx.0'.
1565 To create the remaining copies of the vector-stmt (VSnew.j), this
1566 function is called to get the relevant vector-def for each operand. It is
1567 obtained from the respective VS1.j stmt, which is recorded in the
1568 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1570 For example, to obtain the vector-def 'vx.1' in order to create the
1571 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1572 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1573 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1574 and return its def ('vx.1').
1575 Overall, to create the above sequence this function will be called 3 times:
1576 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1577 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1578 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1580 tree
1581 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1583 gimple vec_stmt_for_operand;
1584 stmt_vec_info def_stmt_info;
1586 /* Do nothing; can reuse same def. */
1587 if (dt == vect_external_def || dt == vect_constant_def )
1588 return vec_oprnd;
1590 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1591 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1592 gcc_assert (def_stmt_info);
1593 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1594 gcc_assert (vec_stmt_for_operand);
1595 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1596 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1597 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1598 else
1599 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1600 return vec_oprnd;
1604 /* Get vectorized definitions for the operands to create a copy of an original
1605 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1607 static void
1608 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1609 vec<tree> *vec_oprnds0,
1610 vec<tree> *vec_oprnds1)
1612 tree vec_oprnd = vec_oprnds0->pop ();
1614 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1615 vec_oprnds0->quick_push (vec_oprnd);
1617 if (vec_oprnds1 && vec_oprnds1->length ())
1619 vec_oprnd = vec_oprnds1->pop ();
1620 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1621 vec_oprnds1->quick_push (vec_oprnd);
1626 /* Get vectorized definitions for OP0 and OP1.
1627 REDUC_INDEX is the index of reduction operand in case of reduction,
1628 and -1 otherwise. */
1630 void
1631 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1632 vec<tree> *vec_oprnds0,
1633 vec<tree> *vec_oprnds1,
1634 slp_tree slp_node, int reduc_index)
1636 if (slp_node)
1638 int nops = (op1 == NULL_TREE) ? 1 : 2;
1639 auto_vec<tree> ops (nops);
1640 auto_vec<vec<tree> > vec_defs (nops);
1642 ops.quick_push (op0);
1643 if (op1)
1644 ops.quick_push (op1);
1646 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1648 *vec_oprnds0 = vec_defs[0];
1649 if (op1)
1650 *vec_oprnds1 = vec_defs[1];
1652 else
1654 tree vec_oprnd;
1656 vec_oprnds0->create (1);
1657 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1658 vec_oprnds0->quick_push (vec_oprnd);
1660 if (op1)
1662 vec_oprnds1->create (1);
1663 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1664 vec_oprnds1->quick_push (vec_oprnd);
1670 /* Function vect_finish_stmt_generation.
1672 Insert a new stmt. */
1674 void
1675 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1676 gimple_stmt_iterator *gsi)
1678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1680 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1682 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1684 if (!gsi_end_p (*gsi)
1685 && gimple_has_mem_ops (vec_stmt))
1687 gimple at_stmt = gsi_stmt (*gsi);
1688 tree vuse = gimple_vuse (at_stmt);
1689 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1691 tree vdef = gimple_vdef (at_stmt);
1692 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1693 /* If we have an SSA vuse and insert a store, update virtual
1694 SSA form to avoid triggering the renamer. Do so only
1695 if we can easily see all uses - which is what almost always
1696 happens with the way vectorized stmts are inserted. */
1697 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1698 && ((is_gimple_assign (vec_stmt)
1699 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1700 || (is_gimple_call (vec_stmt)
1701 && !(gimple_call_flags (vec_stmt)
1702 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1704 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1705 gimple_set_vdef (vec_stmt, new_vdef);
1706 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1710 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1712 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1713 bb_vinfo));
1715 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1718 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1721 gimple_set_location (vec_stmt, gimple_location (stmt));
1723 /* While EH edges will generally prevent vectorization, stmt might
1724 e.g. be in a must-not-throw region. Ensure newly created stmts
1725 that could throw are part of the same region. */
1726 int lp_nr = lookup_stmt_eh_lp (stmt);
1727 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1728 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1731 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1732 a function declaration if the target has a vectorized version
1733 of the function, or NULL_TREE if the function cannot be vectorized. */
1735 tree
1736 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1738 tree fndecl = gimple_call_fndecl (call);
1740 /* We only handle functions that do not read or clobber memory -- i.e.
1741 const or novops ones. */
1742 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1743 return NULL_TREE;
1745 if (!fndecl
1746 || TREE_CODE (fndecl) != FUNCTION_DECL
1747 || !DECL_BUILT_IN (fndecl))
1748 return NULL_TREE;
1750 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1751 vectype_in);
1755 static tree permute_vec_elements (tree, tree, tree, gimple,
1756 gimple_stmt_iterator *);
1759 /* Function vectorizable_mask_load_store.
1761 Check if STMT performs a conditional load or store that can be vectorized.
1762 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1763 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1764 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1766 static bool
1767 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1768 gimple *vec_stmt, slp_tree slp_node)
1770 tree vec_dest = NULL;
1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1772 stmt_vec_info prev_stmt_info;
1773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1775 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1776 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1778 tree elem_type;
1779 gimple new_stmt;
1780 tree dummy;
1781 tree dataref_ptr = NULL_TREE;
1782 gimple ptr_incr;
1783 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1784 int ncopies;
1785 int i, j;
1786 bool inv_p;
1787 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1788 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1789 int gather_scale = 1;
1790 enum vect_def_type gather_dt = vect_unknown_def_type;
1791 bool is_store;
1792 tree mask;
1793 gimple def_stmt;
1794 tree def;
1795 enum vect_def_type dt;
1797 if (slp_node != NULL)
1798 return false;
1800 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1801 gcc_assert (ncopies >= 1);
1803 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1804 mask = gimple_call_arg (stmt, 2);
1805 if (TYPE_PRECISION (TREE_TYPE (mask))
1806 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1807 return false;
1809 /* FORNOW. This restriction should be relaxed. */
1810 if (nested_in_vect_loop && ncopies > 1)
1812 if (dump_enabled_p ())
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1814 "multiple types in nested loop.");
1815 return false;
1818 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1819 return false;
1821 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1822 return false;
1824 if (!STMT_VINFO_DATA_REF (stmt_info))
1825 return false;
1827 elem_type = TREE_TYPE (vectype);
1829 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1830 return false;
1832 if (STMT_VINFO_STRIDED_P (stmt_info))
1833 return false;
1835 if (STMT_VINFO_GATHER_P (stmt_info))
1837 gimple def_stmt;
1838 tree def;
1839 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1840 &gather_off, &gather_scale);
1841 gcc_assert (gather_decl);
1842 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1843 &def_stmt, &def, &gather_dt,
1844 &gather_off_vectype))
1846 if (dump_enabled_p ())
1847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1848 "gather index use not simple.");
1849 return false;
1852 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1853 tree masktype
1854 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1855 if (TREE_CODE (masktype) == INTEGER_TYPE)
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859 "masked gather with integer mask not supported.");
1860 return false;
1863 else if (tree_int_cst_compare (nested_in_vect_loop
1864 ? STMT_VINFO_DR_STEP (stmt_info)
1865 : DR_STEP (dr), size_zero_node) <= 0)
1866 return false;
1867 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1868 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1869 return false;
1871 if (TREE_CODE (mask) != SSA_NAME)
1872 return false;
1874 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1875 &def_stmt, &def, &dt))
1876 return false;
1878 if (is_store)
1880 tree rhs = gimple_call_arg (stmt, 3);
1881 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1882 &def_stmt, &def, &dt))
1883 return false;
1886 if (!vec_stmt) /* transformation not required. */
1888 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1889 if (is_store)
1890 vect_model_store_cost (stmt_info, ncopies, false, dt,
1891 NULL, NULL, NULL);
1892 else
1893 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1894 return true;
1897 /** Transform. **/
1899 if (STMT_VINFO_GATHER_P (stmt_info))
1901 tree vec_oprnd0 = NULL_TREE, op;
1902 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1903 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1904 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1905 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1906 tree mask_perm_mask = NULL_TREE;
1907 edge pe = loop_preheader_edge (loop);
1908 gimple_seq seq;
1909 basic_block new_bb;
1910 enum { NARROW, NONE, WIDEN } modifier;
1911 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1913 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1914 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1918 scaletype = TREE_VALUE (arglist);
1919 gcc_checking_assert (types_compatible_p (srctype, rettype)
1920 && types_compatible_p (srctype, masktype));
1922 if (nunits == gather_off_nunits)
1923 modifier = NONE;
1924 else if (nunits == gather_off_nunits / 2)
1926 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1927 modifier = WIDEN;
1929 for (i = 0; i < gather_off_nunits; ++i)
1930 sel[i] = i | nunits;
1932 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1934 else if (nunits == gather_off_nunits * 2)
1936 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1937 modifier = NARROW;
1939 for (i = 0; i < nunits; ++i)
1940 sel[i] = i < gather_off_nunits
1941 ? i : i + nunits - gather_off_nunits;
1943 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1944 ncopies *= 2;
1945 for (i = 0; i < nunits; ++i)
1946 sel[i] = i | gather_off_nunits;
1947 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1949 else
1950 gcc_unreachable ();
1952 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1954 ptr = fold_convert (ptrtype, gather_base);
1955 if (!is_gimple_min_invariant (ptr))
1957 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1958 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1959 gcc_assert (!new_bb);
1962 scale = build_int_cst (scaletype, gather_scale);
1964 prev_stmt_info = NULL;
1965 for (j = 0; j < ncopies; ++j)
1967 if (modifier == WIDEN && (j & 1))
1968 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1969 perm_mask, stmt, gsi);
1970 else if (j == 0)
1971 op = vec_oprnd0
1972 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1973 else
1974 op = vec_oprnd0
1975 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1977 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1979 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1980 == TYPE_VECTOR_SUBPARTS (idxtype));
1981 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1982 var = make_ssa_name (var);
1983 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1984 new_stmt
1985 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1987 op = var;
1990 if (mask_perm_mask && (j & 1))
1991 mask_op = permute_vec_elements (mask_op, mask_op,
1992 mask_perm_mask, stmt, gsi);
1993 else
1995 if (j == 0)
1996 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1997 else
1999 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2000 &def_stmt, &def, &dt);
2001 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2004 mask_op = vec_mask;
2005 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2007 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2008 == TYPE_VECTOR_SUBPARTS (masktype));
2009 var = vect_get_new_vect_var (masktype, vect_simple_var,
2010 NULL);
2011 var = make_ssa_name (var);
2012 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2013 new_stmt
2014 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2016 mask_op = var;
2020 new_stmt
2021 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2022 scale);
2024 if (!useless_type_conversion_p (vectype, rettype))
2026 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2027 == TYPE_VECTOR_SUBPARTS (rettype));
2028 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2029 op = make_ssa_name (var, new_stmt);
2030 gimple_call_set_lhs (new_stmt, op);
2031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2032 var = make_ssa_name (vec_dest);
2033 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2034 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2036 else
2038 var = make_ssa_name (vec_dest, new_stmt);
2039 gimple_call_set_lhs (new_stmt, var);
2042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2044 if (modifier == NARROW)
2046 if ((j & 1) == 0)
2048 prev_res = var;
2049 continue;
2051 var = permute_vec_elements (prev_res, var,
2052 perm_mask, stmt, gsi);
2053 new_stmt = SSA_NAME_DEF_STMT (var);
2056 if (prev_stmt_info == NULL)
2057 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2058 else
2059 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2060 prev_stmt_info = vinfo_for_stmt (new_stmt);
2063 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2064 from the IL. */
2065 tree lhs = gimple_call_lhs (stmt);
2066 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2067 set_vinfo_for_stmt (new_stmt, stmt_info);
2068 set_vinfo_for_stmt (stmt, NULL);
2069 STMT_VINFO_STMT (stmt_info) = new_stmt;
2070 gsi_replace (gsi, new_stmt, true);
2071 return true;
2073 else if (is_store)
2075 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2076 prev_stmt_info = NULL;
2077 for (i = 0; i < ncopies; i++)
2079 unsigned align, misalign;
2081 if (i == 0)
2083 tree rhs = gimple_call_arg (stmt, 3);
2084 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2085 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2086 /* We should have catched mismatched types earlier. */
2087 gcc_assert (useless_type_conversion_p (vectype,
2088 TREE_TYPE (vec_rhs)));
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2094 else
2096 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2097 &def, &dt);
2098 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2099 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2100 &def, &dt);
2101 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2102 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2103 TYPE_SIZE_UNIT (vectype));
2106 align = TYPE_ALIGN_UNIT (vectype);
2107 if (aligned_access_p (dr))
2108 misalign = 0;
2109 else if (DR_MISALIGNMENT (dr) == -1)
2111 align = TYPE_ALIGN_UNIT (elem_type);
2112 misalign = 0;
2114 else
2115 misalign = DR_MISALIGNMENT (dr);
2116 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2117 misalign);
2118 new_stmt
2119 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2120 gimple_call_arg (stmt, 1),
2121 vec_mask, vec_rhs);
2122 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2123 if (i == 0)
2124 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2125 else
2126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2127 prev_stmt_info = vinfo_for_stmt (new_stmt);
2130 else
2132 tree vec_mask = NULL_TREE;
2133 prev_stmt_info = NULL;
2134 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2135 for (i = 0; i < ncopies; i++)
2137 unsigned align, misalign;
2139 if (i == 0)
2141 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2142 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2143 NULL_TREE, &dummy, gsi,
2144 &ptr_incr, false, &inv_p);
2145 gcc_assert (!inv_p);
2147 else
2149 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2150 &def, &dt);
2151 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2153 TYPE_SIZE_UNIT (vectype));
2156 align = TYPE_ALIGN_UNIT (vectype);
2157 if (aligned_access_p (dr))
2158 misalign = 0;
2159 else if (DR_MISALIGNMENT (dr) == -1)
2161 align = TYPE_ALIGN_UNIT (elem_type);
2162 misalign = 0;
2164 else
2165 misalign = DR_MISALIGNMENT (dr);
2166 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2167 misalign);
2168 new_stmt
2169 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2170 gimple_call_arg (stmt, 1),
2171 vec_mask);
2172 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2174 if (i == 0)
2175 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2176 else
2177 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2178 prev_stmt_info = vinfo_for_stmt (new_stmt);
2182 if (!is_store)
2184 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2185 from the IL. */
2186 tree lhs = gimple_call_lhs (stmt);
2187 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2188 set_vinfo_for_stmt (new_stmt, stmt_info);
2189 set_vinfo_for_stmt (stmt, NULL);
2190 STMT_VINFO_STMT (stmt_info) = new_stmt;
2191 gsi_replace (gsi, new_stmt, true);
2194 return true;
2198 /* Function vectorizable_call.
2200 Check if GS performs a function call that can be vectorized.
2201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2205 static bool
2206 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2207 slp_tree slp_node)
2209 gcall *stmt;
2210 tree vec_dest;
2211 tree scalar_dest;
2212 tree op, type;
2213 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2214 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2215 tree vectype_out, vectype_in;
2216 int nunits_in;
2217 int nunits_out;
2218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2219 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2220 tree fndecl, new_temp, def, rhs_type;
2221 gimple def_stmt;
2222 enum vect_def_type dt[3]
2223 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2224 gimple new_stmt = NULL;
2225 int ncopies, j;
2226 vec<tree> vargs = vNULL;
2227 enum { NARROW, NONE, WIDEN } modifier;
2228 size_t i, nargs;
2229 tree lhs;
2231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2232 return false;
2234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2235 return false;
2237 /* Is GS a vectorizable call? */
2238 stmt = dyn_cast <gcall *> (gs);
2239 if (!stmt)
2240 return false;
2242 if (gimple_call_internal_p (stmt)
2243 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2244 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2245 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2246 slp_node);
2248 if (gimple_call_lhs (stmt) == NULL_TREE
2249 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2250 return false;
2252 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2254 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2256 /* Process function arguments. */
2257 rhs_type = NULL_TREE;
2258 vectype_in = NULL_TREE;
2259 nargs = gimple_call_num_args (stmt);
2261 /* Bail out if the function has more than three arguments, we do not have
2262 interesting builtin functions to vectorize with more than two arguments
2263 except for fma. No arguments is also not good. */
2264 if (nargs == 0 || nargs > 3)
2265 return false;
2267 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2268 if (gimple_call_internal_p (stmt)
2269 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2271 nargs = 0;
2272 rhs_type = unsigned_type_node;
2275 for (i = 0; i < nargs; i++)
2277 tree opvectype;
2279 op = gimple_call_arg (stmt, i);
2281 /* We can only handle calls with arguments of the same type. */
2282 if (rhs_type
2283 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "argument types differ.\n");
2288 return false;
2290 if (!rhs_type)
2291 rhs_type = TREE_TYPE (op);
2293 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2294 &def_stmt, &def, &dt[i], &opvectype))
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "use not simple.\n");
2299 return false;
2302 if (!vectype_in)
2303 vectype_in = opvectype;
2304 else if (opvectype
2305 && opvectype != vectype_in)
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2309 "argument vector types differ.\n");
2310 return false;
2313 /* If all arguments are external or constant defs use a vector type with
2314 the same size as the output vector type. */
2315 if (!vectype_in)
2316 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2317 if (vec_stmt)
2318 gcc_assert (vectype_in);
2319 if (!vectype_in)
2321 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "no vectype for scalar type ");
2325 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2326 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2329 return false;
2332 /* FORNOW */
2333 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2334 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2335 if (nunits_in == nunits_out / 2)
2336 modifier = NARROW;
2337 else if (nunits_out == nunits_in)
2338 modifier = NONE;
2339 else if (nunits_out == nunits_in / 2)
2340 modifier = WIDEN;
2341 else
2342 return false;
2344 /* For now, we only vectorize functions if a target specific builtin
2345 is available. TODO -- in some cases, it might be profitable to
2346 insert the calls for pieces of the vector, in order to be able
2347 to vectorize other operations in the loop. */
2348 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2349 if (fndecl == NULL_TREE)
2351 if (gimple_call_internal_p (stmt)
2352 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2353 && !slp_node
2354 && loop_vinfo
2355 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2356 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2357 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2358 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2360 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2361 { 0, 1, 2, ... vf - 1 } vector. */
2362 gcc_assert (nargs == 0);
2364 else
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 "function is not vectorizable.\n");
2369 return false;
2373 gcc_assert (!gimple_vuse (stmt));
2375 if (slp_node || PURE_SLP_STMT (stmt_info))
2376 ncopies = 1;
2377 else if (modifier == NARROW)
2378 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2379 else
2380 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2382 /* Sanity check: make sure that at least one copy of the vectorized stmt
2383 needs to be generated. */
2384 gcc_assert (ncopies >= 1);
2386 if (!vec_stmt) /* transformation not required. */
2388 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2391 "\n");
2392 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2393 return true;
2396 /** Transform. **/
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2401 /* Handle def. */
2402 scalar_dest = gimple_call_lhs (stmt);
2403 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2405 prev_stmt_info = NULL;
2406 switch (modifier)
2408 case NONE:
2409 for (j = 0; j < ncopies; ++j)
2411 /* Build argument list for the vectorized call. */
2412 if (j == 0)
2413 vargs.create (nargs);
2414 else
2415 vargs.truncate (0);
2417 if (slp_node)
2419 auto_vec<vec<tree> > vec_defs (nargs);
2420 vec<tree> vec_oprnds0;
2422 for (i = 0; i < nargs; i++)
2423 vargs.quick_push (gimple_call_arg (stmt, i));
2424 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2425 vec_oprnds0 = vec_defs[0];
2427 /* Arguments are ready. Create the new vector stmt. */
2428 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2430 size_t k;
2431 for (k = 0; k < nargs; k++)
2433 vec<tree> vec_oprndsk = vec_defs[k];
2434 vargs[k] = vec_oprndsk[i];
2436 new_stmt = gimple_build_call_vec (fndecl, vargs);
2437 new_temp = make_ssa_name (vec_dest, new_stmt);
2438 gimple_call_set_lhs (new_stmt, new_temp);
2439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2440 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2443 for (i = 0; i < nargs; i++)
2445 vec<tree> vec_oprndsi = vec_defs[i];
2446 vec_oprndsi.release ();
2448 continue;
2451 for (i = 0; i < nargs; i++)
2453 op = gimple_call_arg (stmt, i);
2454 if (j == 0)
2455 vec_oprnd0
2456 = vect_get_vec_def_for_operand (op, stmt, NULL);
2457 else
2459 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2460 vec_oprnd0
2461 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2464 vargs.quick_push (vec_oprnd0);
2467 if (gimple_call_internal_p (stmt)
2468 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2470 tree *v = XALLOCAVEC (tree, nunits_out);
2471 int k;
2472 for (k = 0; k < nunits_out; ++k)
2473 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2474 tree cst = build_vector (vectype_out, v);
2475 tree new_var
2476 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2477 gimple init_stmt = gimple_build_assign (new_var, cst);
2478 new_temp = make_ssa_name (new_var, init_stmt);
2479 gimple_assign_set_lhs (init_stmt, new_temp);
2480 vect_init_vector_1 (stmt, init_stmt, NULL);
2481 new_temp = make_ssa_name (vec_dest);
2482 new_stmt = gimple_build_assign (new_temp,
2483 gimple_assign_lhs (init_stmt));
2485 else
2487 new_stmt = gimple_build_call_vec (fndecl, vargs);
2488 new_temp = make_ssa_name (vec_dest, new_stmt);
2489 gimple_call_set_lhs (new_stmt, new_temp);
2491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2493 if (j == 0)
2494 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2495 else
2496 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2498 prev_stmt_info = vinfo_for_stmt (new_stmt);
2501 break;
2503 case NARROW:
2504 for (j = 0; j < ncopies; ++j)
2506 /* Build argument list for the vectorized call. */
2507 if (j == 0)
2508 vargs.create (nargs * 2);
2509 else
2510 vargs.truncate (0);
2512 if (slp_node)
2514 auto_vec<vec<tree> > vec_defs (nargs);
2515 vec<tree> vec_oprnds0;
2517 for (i = 0; i < nargs; i++)
2518 vargs.quick_push (gimple_call_arg (stmt, i));
2519 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2520 vec_oprnds0 = vec_defs[0];
2522 /* Arguments are ready. Create the new vector stmt. */
2523 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2525 size_t k;
2526 vargs.truncate (0);
2527 for (k = 0; k < nargs; k++)
2529 vec<tree> vec_oprndsk = vec_defs[k];
2530 vargs.quick_push (vec_oprndsk[i]);
2531 vargs.quick_push (vec_oprndsk[i + 1]);
2533 new_stmt = gimple_build_call_vec (fndecl, vargs);
2534 new_temp = make_ssa_name (vec_dest, new_stmt);
2535 gimple_call_set_lhs (new_stmt, new_temp);
2536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2537 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2540 for (i = 0; i < nargs; i++)
2542 vec<tree> vec_oprndsi = vec_defs[i];
2543 vec_oprndsi.release ();
2545 continue;
2548 for (i = 0; i < nargs; i++)
2550 op = gimple_call_arg (stmt, i);
2551 if (j == 0)
2553 vec_oprnd0
2554 = vect_get_vec_def_for_operand (op, stmt, NULL);
2555 vec_oprnd1
2556 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2558 else
2560 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2561 vec_oprnd0
2562 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2563 vec_oprnd1
2564 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2567 vargs.quick_push (vec_oprnd0);
2568 vargs.quick_push (vec_oprnd1);
2571 new_stmt = gimple_build_call_vec (fndecl, vargs);
2572 new_temp = make_ssa_name (vec_dest, new_stmt);
2573 gimple_call_set_lhs (new_stmt, new_temp);
2574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2576 if (j == 0)
2577 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2578 else
2579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2581 prev_stmt_info = vinfo_for_stmt (new_stmt);
2584 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2586 break;
2588 case WIDEN:
2589 /* No current target implements this case. */
2590 return false;
2593 vargs.release ();
2595 /* The call in STMT might prevent it from being removed in dce.
2596 We however cannot remove it here, due to the way the ssa name
2597 it defines is mapped to the new definition. So just replace
2598 rhs of the statement with something harmless. */
2600 if (slp_node)
2601 return true;
2603 type = TREE_TYPE (scalar_dest);
2604 if (is_pattern_stmt_p (stmt_info))
2605 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2606 else
2607 lhs = gimple_call_lhs (stmt);
2608 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2609 set_vinfo_for_stmt (new_stmt, stmt_info);
2610 set_vinfo_for_stmt (stmt, NULL);
2611 STMT_VINFO_STMT (stmt_info) = new_stmt;
2612 gsi_replace (gsi, new_stmt, false);
2614 return true;
2618 struct simd_call_arg_info
2620 tree vectype;
2621 tree op;
2622 enum vect_def_type dt;
2623 HOST_WIDE_INT linear_step;
2624 unsigned int align;
2627 /* Function vectorizable_simd_clone_call.
2629 Check if STMT performs a function call that can be vectorized
2630 by calling a simd clone of the function.
2631 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2632 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2633 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2635 static bool
2636 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2637 gimple *vec_stmt, slp_tree slp_node)
2639 tree vec_dest;
2640 tree scalar_dest;
2641 tree op, type;
2642 tree vec_oprnd0 = NULL_TREE;
2643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2644 tree vectype;
2645 unsigned int nunits;
2646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2647 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2648 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2649 tree fndecl, new_temp, def;
2650 gimple def_stmt;
2651 gimple new_stmt = NULL;
2652 int ncopies, j;
2653 vec<simd_call_arg_info> arginfo = vNULL;
2654 vec<tree> vargs = vNULL;
2655 size_t i, nargs;
2656 tree lhs, rtype, ratype;
2657 vec<constructor_elt, va_gc> *ret_ctor_elts;
2659 /* Is STMT a vectorizable call? */
2660 if (!is_gimple_call (stmt))
2661 return false;
2663 fndecl = gimple_call_fndecl (stmt);
2664 if (fndecl == NULL_TREE)
2665 return false;
2667 struct cgraph_node *node = cgraph_node::get (fndecl);
2668 if (node == NULL || node->simd_clones == NULL)
2669 return false;
2671 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2672 return false;
2674 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2675 return false;
2677 if (gimple_call_lhs (stmt)
2678 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2679 return false;
2681 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2683 vectype = STMT_VINFO_VECTYPE (stmt_info);
2685 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2686 return false;
2688 /* FORNOW */
2689 if (slp_node || PURE_SLP_STMT (stmt_info))
2690 return false;
2692 /* Process function arguments. */
2693 nargs = gimple_call_num_args (stmt);
2695 /* Bail out if the function has zero arguments. */
2696 if (nargs == 0)
2697 return false;
2699 arginfo.create (nargs);
2701 for (i = 0; i < nargs; i++)
2703 simd_call_arg_info thisarginfo;
2704 affine_iv iv;
2706 thisarginfo.linear_step = 0;
2707 thisarginfo.align = 0;
2708 thisarginfo.op = NULL_TREE;
2710 op = gimple_call_arg (stmt, i);
2711 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2712 &def_stmt, &def, &thisarginfo.dt,
2713 &thisarginfo.vectype)
2714 || thisarginfo.dt == vect_uninitialized_def)
2716 if (dump_enabled_p ())
2717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2718 "use not simple.\n");
2719 arginfo.release ();
2720 return false;
2723 if (thisarginfo.dt == vect_constant_def
2724 || thisarginfo.dt == vect_external_def)
2725 gcc_assert (thisarginfo.vectype == NULL_TREE);
2726 else
2727 gcc_assert (thisarginfo.vectype != NULL_TREE);
2729 /* For linear arguments, the analyze phase should have saved
2730 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2731 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2732 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2734 gcc_assert (vec_stmt);
2735 thisarginfo.linear_step
2736 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2737 thisarginfo.op
2738 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2739 /* If loop has been peeled for alignment, we need to adjust it. */
2740 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2741 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2742 if (n1 != n2)
2744 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2745 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2746 tree opt = TREE_TYPE (thisarginfo.op);
2747 bias = fold_convert (TREE_TYPE (step), bias);
2748 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2749 thisarginfo.op
2750 = fold_build2 (POINTER_TYPE_P (opt)
2751 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2752 thisarginfo.op, bias);
2755 else if (!vec_stmt
2756 && thisarginfo.dt != vect_constant_def
2757 && thisarginfo.dt != vect_external_def
2758 && loop_vinfo
2759 && TREE_CODE (op) == SSA_NAME
2760 && simple_iv (loop, loop_containing_stmt (stmt), op,
2761 &iv, false)
2762 && tree_fits_shwi_p (iv.step))
2764 thisarginfo.linear_step = tree_to_shwi (iv.step);
2765 thisarginfo.op = iv.base;
2767 else if ((thisarginfo.dt == vect_constant_def
2768 || thisarginfo.dt == vect_external_def)
2769 && POINTER_TYPE_P (TREE_TYPE (op)))
2770 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2772 arginfo.quick_push (thisarginfo);
2775 unsigned int badness = 0;
2776 struct cgraph_node *bestn = NULL;
2777 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2778 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2779 else
2780 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2781 n = n->simdclone->next_clone)
2783 unsigned int this_badness = 0;
2784 if (n->simdclone->simdlen
2785 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2786 || n->simdclone->nargs != nargs)
2787 continue;
2788 if (n->simdclone->simdlen
2789 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2791 - exact_log2 (n->simdclone->simdlen)) * 1024;
2792 if (n->simdclone->inbranch)
2793 this_badness += 2048;
2794 int target_badness = targetm.simd_clone.usable (n);
2795 if (target_badness < 0)
2796 continue;
2797 this_badness += target_badness * 512;
2798 /* FORNOW: Have to add code to add the mask argument. */
2799 if (n->simdclone->inbranch)
2800 continue;
2801 for (i = 0; i < nargs; i++)
2803 switch (n->simdclone->args[i].arg_type)
2805 case SIMD_CLONE_ARG_TYPE_VECTOR:
2806 if (!useless_type_conversion_p
2807 (n->simdclone->args[i].orig_type,
2808 TREE_TYPE (gimple_call_arg (stmt, i))))
2809 i = -1;
2810 else if (arginfo[i].dt == vect_constant_def
2811 || arginfo[i].dt == vect_external_def
2812 || arginfo[i].linear_step)
2813 this_badness += 64;
2814 break;
2815 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2816 if (arginfo[i].dt != vect_constant_def
2817 && arginfo[i].dt != vect_external_def)
2818 i = -1;
2819 break;
2820 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2821 if (arginfo[i].dt == vect_constant_def
2822 || arginfo[i].dt == vect_external_def
2823 || (arginfo[i].linear_step
2824 != n->simdclone->args[i].linear_step))
2825 i = -1;
2826 break;
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2828 /* FORNOW */
2829 i = -1;
2830 break;
2831 case SIMD_CLONE_ARG_TYPE_MASK:
2832 gcc_unreachable ();
2834 if (i == (size_t) -1)
2835 break;
2836 if (n->simdclone->args[i].alignment > arginfo[i].align)
2838 i = -1;
2839 break;
2841 if (arginfo[i].align)
2842 this_badness += (exact_log2 (arginfo[i].align)
2843 - exact_log2 (n->simdclone->args[i].alignment));
2845 if (i == (size_t) -1)
2846 continue;
2847 if (bestn == NULL || this_badness < badness)
2849 bestn = n;
2850 badness = this_badness;
2854 if (bestn == NULL)
2856 arginfo.release ();
2857 return false;
2860 for (i = 0; i < nargs; i++)
2861 if ((arginfo[i].dt == vect_constant_def
2862 || arginfo[i].dt == vect_external_def)
2863 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2865 arginfo[i].vectype
2866 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2867 i)));
2868 if (arginfo[i].vectype == NULL
2869 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2870 > bestn->simdclone->simdlen))
2872 arginfo.release ();
2873 return false;
2877 fndecl = bestn->decl;
2878 nunits = bestn->simdclone->simdlen;
2879 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2881 /* If the function isn't const, only allow it in simd loops where user
2882 has asserted that at least nunits consecutive iterations can be
2883 performed using SIMD instructions. */
2884 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2885 && gimple_vuse (stmt))
2887 arginfo.release ();
2888 return false;
2891 /* Sanity check: make sure that at least one copy of the vectorized stmt
2892 needs to be generated. */
2893 gcc_assert (ncopies >= 1);
2895 if (!vec_stmt) /* transformation not required. */
2897 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2898 for (i = 0; i < nargs; i++)
2899 if (bestn->simdclone->args[i].arg_type
2900 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2903 + 1);
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2905 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2906 ? size_type_node : TREE_TYPE (arginfo[i].op);
2907 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2908 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2910 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2911 if (dump_enabled_p ())
2912 dump_printf_loc (MSG_NOTE, vect_location,
2913 "=== vectorizable_simd_clone_call ===\n");
2914 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2915 arginfo.release ();
2916 return true;
2919 /** Transform. **/
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2924 /* Handle def. */
2925 scalar_dest = gimple_call_lhs (stmt);
2926 vec_dest = NULL_TREE;
2927 rtype = NULL_TREE;
2928 ratype = NULL_TREE;
2929 if (scalar_dest)
2931 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2932 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2933 if (TREE_CODE (rtype) == ARRAY_TYPE)
2935 ratype = rtype;
2936 rtype = TREE_TYPE (ratype);
2940 prev_stmt_info = NULL;
2941 for (j = 0; j < ncopies; ++j)
2943 /* Build argument list for the vectorized call. */
2944 if (j == 0)
2945 vargs.create (nargs);
2946 else
2947 vargs.truncate (0);
2949 for (i = 0; i < nargs; i++)
2951 unsigned int k, l, m, o;
2952 tree atype;
2953 op = gimple_call_arg (stmt, i);
2954 switch (bestn->simdclone->args[i].arg_type)
2956 case SIMD_CLONE_ARG_TYPE_VECTOR:
2957 atype = bestn->simdclone->args[i].vector_type;
2958 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2959 for (m = j * o; m < (j + 1) * o; m++)
2961 if (TYPE_VECTOR_SUBPARTS (atype)
2962 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2964 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2965 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2966 / TYPE_VECTOR_SUBPARTS (atype));
2967 gcc_assert ((k & (k - 1)) == 0);
2968 if (m == 0)
2969 vec_oprnd0
2970 = vect_get_vec_def_for_operand (op, stmt, NULL);
2971 else
2973 vec_oprnd0 = arginfo[i].op;
2974 if ((m & (k - 1)) == 0)
2975 vec_oprnd0
2976 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2977 vec_oprnd0);
2979 arginfo[i].op = vec_oprnd0;
2980 vec_oprnd0
2981 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2982 size_int (prec),
2983 bitsize_int ((m & (k - 1)) * prec));
2984 new_stmt
2985 = gimple_build_assign (make_ssa_name (atype),
2986 vec_oprnd0);
2987 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2988 vargs.safe_push (gimple_assign_lhs (new_stmt));
2990 else
2992 k = (TYPE_VECTOR_SUBPARTS (atype)
2993 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2994 gcc_assert ((k & (k - 1)) == 0);
2995 vec<constructor_elt, va_gc> *ctor_elts;
2996 if (k != 1)
2997 vec_alloc (ctor_elts, k);
2998 else
2999 ctor_elts = NULL;
3000 for (l = 0; l < k; l++)
3002 if (m == 0 && l == 0)
3003 vec_oprnd0
3004 = vect_get_vec_def_for_operand (op, stmt, NULL);
3005 else
3006 vec_oprnd0
3007 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3008 arginfo[i].op);
3009 arginfo[i].op = vec_oprnd0;
3010 if (k == 1)
3011 break;
3012 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3013 vec_oprnd0);
3015 if (k == 1)
3016 vargs.safe_push (vec_oprnd0);
3017 else
3019 vec_oprnd0 = build_constructor (atype, ctor_elts);
3020 new_stmt
3021 = gimple_build_assign (make_ssa_name (atype),
3022 vec_oprnd0);
3023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3024 vargs.safe_push (gimple_assign_lhs (new_stmt));
3028 break;
3029 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3030 vargs.safe_push (op);
3031 break;
3032 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3033 if (j == 0)
3035 gimple_seq stmts;
3036 arginfo[i].op
3037 = force_gimple_operand (arginfo[i].op, &stmts, true,
3038 NULL_TREE);
3039 if (stmts != NULL)
3041 basic_block new_bb;
3042 edge pe = loop_preheader_edge (loop);
3043 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3044 gcc_assert (!new_bb);
3046 tree phi_res = copy_ssa_name (op);
3047 gphi *new_phi = create_phi_node (phi_res, loop->header);
3048 set_vinfo_for_stmt (new_phi,
3049 new_stmt_vec_info (new_phi, loop_vinfo,
3050 NULL));
3051 add_phi_arg (new_phi, arginfo[i].op,
3052 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3053 enum tree_code code
3054 = POINTER_TYPE_P (TREE_TYPE (op))
3055 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3056 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3057 ? sizetype : TREE_TYPE (op);
3058 widest_int cst
3059 = wi::mul (bestn->simdclone->args[i].linear_step,
3060 ncopies * nunits);
3061 tree tcst = wide_int_to_tree (type, cst);
3062 tree phi_arg = copy_ssa_name (op);
3063 new_stmt
3064 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3065 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3066 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3067 set_vinfo_for_stmt (new_stmt,
3068 new_stmt_vec_info (new_stmt, loop_vinfo,
3069 NULL));
3070 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3071 UNKNOWN_LOCATION);
3072 arginfo[i].op = phi_res;
3073 vargs.safe_push (phi_res);
3075 else
3077 enum tree_code code
3078 = POINTER_TYPE_P (TREE_TYPE (op))
3079 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3080 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3081 ? sizetype : TREE_TYPE (op);
3082 widest_int cst
3083 = wi::mul (bestn->simdclone->args[i].linear_step,
3084 j * nunits);
3085 tree tcst = wide_int_to_tree (type, cst);
3086 new_temp = make_ssa_name (TREE_TYPE (op));
3087 new_stmt = gimple_build_assign (new_temp, code,
3088 arginfo[i].op, tcst);
3089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3090 vargs.safe_push (new_temp);
3092 break;
3093 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3094 default:
3095 gcc_unreachable ();
3099 new_stmt = gimple_build_call_vec (fndecl, vargs);
3100 if (vec_dest)
3102 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3103 if (ratype)
3104 new_temp = create_tmp_var (ratype);
3105 else if (TYPE_VECTOR_SUBPARTS (vectype)
3106 == TYPE_VECTOR_SUBPARTS (rtype))
3107 new_temp = make_ssa_name (vec_dest, new_stmt);
3108 else
3109 new_temp = make_ssa_name (rtype, new_stmt);
3110 gimple_call_set_lhs (new_stmt, new_temp);
3112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3114 if (vec_dest)
3116 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3118 unsigned int k, l;
3119 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3120 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3121 gcc_assert ((k & (k - 1)) == 0);
3122 for (l = 0; l < k; l++)
3124 tree t;
3125 if (ratype)
3127 t = build_fold_addr_expr (new_temp);
3128 t = build2 (MEM_REF, vectype, t,
3129 build_int_cst (TREE_TYPE (t),
3130 l * prec / BITS_PER_UNIT));
3132 else
3133 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3134 size_int (prec), bitsize_int (l * prec));
3135 new_stmt
3136 = gimple_build_assign (make_ssa_name (vectype), t);
3137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3138 if (j == 0 && l == 0)
3139 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3140 else
3141 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3143 prev_stmt_info = vinfo_for_stmt (new_stmt);
3146 if (ratype)
3148 tree clobber = build_constructor (ratype, NULL);
3149 TREE_THIS_VOLATILE (clobber) = 1;
3150 new_stmt = gimple_build_assign (new_temp, clobber);
3151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3153 continue;
3155 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3157 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3158 / TYPE_VECTOR_SUBPARTS (rtype));
3159 gcc_assert ((k & (k - 1)) == 0);
3160 if ((j & (k - 1)) == 0)
3161 vec_alloc (ret_ctor_elts, k);
3162 if (ratype)
3164 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3165 for (m = 0; m < o; m++)
3167 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3168 size_int (m), NULL_TREE, NULL_TREE);
3169 new_stmt
3170 = gimple_build_assign (make_ssa_name (rtype), tem);
3171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3172 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3173 gimple_assign_lhs (new_stmt));
3175 tree clobber = build_constructor (ratype, NULL);
3176 TREE_THIS_VOLATILE (clobber) = 1;
3177 new_stmt = gimple_build_assign (new_temp, clobber);
3178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3180 else
3181 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3182 if ((j & (k - 1)) != k - 1)
3183 continue;
3184 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3185 new_stmt
3186 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3189 if ((unsigned) j == k - 1)
3190 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3191 else
3192 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3194 prev_stmt_info = vinfo_for_stmt (new_stmt);
3195 continue;
3197 else if (ratype)
3199 tree t = build_fold_addr_expr (new_temp);
3200 t = build2 (MEM_REF, vectype, t,
3201 build_int_cst (TREE_TYPE (t), 0));
3202 new_stmt
3203 = gimple_build_assign (make_ssa_name (vec_dest), t);
3204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3205 tree clobber = build_constructor (ratype, NULL);
3206 TREE_THIS_VOLATILE (clobber) = 1;
3207 vect_finish_stmt_generation (stmt,
3208 gimple_build_assign (new_temp,
3209 clobber), gsi);
3213 if (j == 0)
3214 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3215 else
3216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3218 prev_stmt_info = vinfo_for_stmt (new_stmt);
3221 vargs.release ();
3223 /* The call in STMT might prevent it from being removed in dce.
3224 We however cannot remove it here, due to the way the ssa name
3225 it defines is mapped to the new definition. So just replace
3226 rhs of the statement with something harmless. */
3228 if (slp_node)
3229 return true;
3231 if (scalar_dest)
3233 type = TREE_TYPE (scalar_dest);
3234 if (is_pattern_stmt_p (stmt_info))
3235 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3236 else
3237 lhs = gimple_call_lhs (stmt);
3238 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3240 else
3241 new_stmt = gimple_build_nop ();
3242 set_vinfo_for_stmt (new_stmt, stmt_info);
3243 set_vinfo_for_stmt (stmt, NULL);
3244 STMT_VINFO_STMT (stmt_info) = new_stmt;
3245 gsi_replace (gsi, new_stmt, true);
3246 unlink_stmt_vdef (stmt);
3248 return true;
3252 /* Function vect_gen_widened_results_half
3254 Create a vector stmt whose code, type, number of arguments, and result
3255 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3256 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3257 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3258 needs to be created (DECL is a function-decl of a target-builtin).
3259 STMT is the original scalar stmt that we are vectorizing. */
3261 static gimple
3262 vect_gen_widened_results_half (enum tree_code code,
3263 tree decl,
3264 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3265 tree vec_dest, gimple_stmt_iterator *gsi,
3266 gimple stmt)
3268 gimple new_stmt;
3269 tree new_temp;
3271 /* Generate half of the widened result: */
3272 if (code == CALL_EXPR)
3274 /* Target specific support */
3275 if (op_type == binary_op)
3276 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3277 else
3278 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3279 new_temp = make_ssa_name (vec_dest, new_stmt);
3280 gimple_call_set_lhs (new_stmt, new_temp);
3282 else
3284 /* Generic support */
3285 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3286 if (op_type != binary_op)
3287 vec_oprnd1 = NULL;
3288 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3289 new_temp = make_ssa_name (vec_dest, new_stmt);
3290 gimple_assign_set_lhs (new_stmt, new_temp);
3292 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3294 return new_stmt;
3298 /* Get vectorized definitions for loop-based vectorization. For the first
3299 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3300 scalar operand), and for the rest we get a copy with
3301 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3302 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3303 The vectors are collected into VEC_OPRNDS. */
3305 static void
3306 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3307 vec<tree> *vec_oprnds, int multi_step_cvt)
3309 tree vec_oprnd;
3311 /* Get first vector operand. */
3312 /* All the vector operands except the very first one (that is scalar oprnd)
3313 are stmt copies. */
3314 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3315 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3316 else
3317 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3319 vec_oprnds->quick_push (vec_oprnd);
3321 /* Get second vector operand. */
3322 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3323 vec_oprnds->quick_push (vec_oprnd);
3325 *oprnd = vec_oprnd;
3327 /* For conversion in multiple steps, continue to get operands
3328 recursively. */
3329 if (multi_step_cvt)
3330 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3334 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3335 For multi-step conversions store the resulting vectors and call the function
3336 recursively. */
3338 static void
3339 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3340 int multi_step_cvt, gimple stmt,
3341 vec<tree> vec_dsts,
3342 gimple_stmt_iterator *gsi,
3343 slp_tree slp_node, enum tree_code code,
3344 stmt_vec_info *prev_stmt_info)
3346 unsigned int i;
3347 tree vop0, vop1, new_tmp, vec_dest;
3348 gimple new_stmt;
3349 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3351 vec_dest = vec_dsts.pop ();
3353 for (i = 0; i < vec_oprnds->length (); i += 2)
3355 /* Create demotion operation. */
3356 vop0 = (*vec_oprnds)[i];
3357 vop1 = (*vec_oprnds)[i + 1];
3358 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3359 new_tmp = make_ssa_name (vec_dest, new_stmt);
3360 gimple_assign_set_lhs (new_stmt, new_tmp);
3361 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3363 if (multi_step_cvt)
3364 /* Store the resulting vector for next recursive call. */
3365 (*vec_oprnds)[i/2] = new_tmp;
3366 else
3368 /* This is the last step of the conversion sequence. Store the
3369 vectors in SLP_NODE or in vector info of the scalar statement
3370 (or in STMT_VINFO_RELATED_STMT chain). */
3371 if (slp_node)
3372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3374 if (!*prev_stmt_info)
3375 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3376 else
3377 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3379 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3383 /* For multi-step demotion operations we first generate demotion operations
3384 from the source type to the intermediate types, and then combine the
3385 results (stored in VEC_OPRNDS) in demotion operation to the destination
3386 type. */
3387 if (multi_step_cvt)
3389 /* At each level of recursion we have half of the operands we had at the
3390 previous level. */
3391 vec_oprnds->truncate ((i+1)/2);
3392 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3393 stmt, vec_dsts, gsi, slp_node,
3394 VEC_PACK_TRUNC_EXPR,
3395 prev_stmt_info);
3398 vec_dsts.quick_push (vec_dest);
3402 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3403 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3404 the resulting vectors and call the function recursively. */
3406 static void
3407 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3408 vec<tree> *vec_oprnds1,
3409 gimple stmt, tree vec_dest,
3410 gimple_stmt_iterator *gsi,
3411 enum tree_code code1,
3412 enum tree_code code2, tree decl1,
3413 tree decl2, int op_type)
3415 int i;
3416 tree vop0, vop1, new_tmp1, new_tmp2;
3417 gimple new_stmt1, new_stmt2;
3418 vec<tree> vec_tmp = vNULL;
3420 vec_tmp.create (vec_oprnds0->length () * 2);
3421 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3423 if (op_type == binary_op)
3424 vop1 = (*vec_oprnds1)[i];
3425 else
3426 vop1 = NULL_TREE;
3428 /* Generate the two halves of promotion operation. */
3429 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3430 op_type, vec_dest, gsi, stmt);
3431 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3432 op_type, vec_dest, gsi, stmt);
3433 if (is_gimple_call (new_stmt1))
3435 new_tmp1 = gimple_call_lhs (new_stmt1);
3436 new_tmp2 = gimple_call_lhs (new_stmt2);
3438 else
3440 new_tmp1 = gimple_assign_lhs (new_stmt1);
3441 new_tmp2 = gimple_assign_lhs (new_stmt2);
3444 /* Store the results for the next step. */
3445 vec_tmp.quick_push (new_tmp1);
3446 vec_tmp.quick_push (new_tmp2);
3449 vec_oprnds0->release ();
3450 *vec_oprnds0 = vec_tmp;
3454 /* Check if STMT performs a conversion operation, that can be vectorized.
3455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3456 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3459 static bool
3460 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3461 gimple *vec_stmt, slp_tree slp_node)
3463 tree vec_dest;
3464 tree scalar_dest;
3465 tree op0, op1 = NULL_TREE;
3466 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3467 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3468 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3469 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3470 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3471 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3472 tree new_temp;
3473 tree def;
3474 gimple def_stmt;
3475 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3476 gimple new_stmt = NULL;
3477 stmt_vec_info prev_stmt_info;
3478 int nunits_in;
3479 int nunits_out;
3480 tree vectype_out, vectype_in;
3481 int ncopies, i, j;
3482 tree lhs_type, rhs_type;
3483 enum { NARROW, NONE, WIDEN } modifier;
3484 vec<tree> vec_oprnds0 = vNULL;
3485 vec<tree> vec_oprnds1 = vNULL;
3486 tree vop0;
3487 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3488 int multi_step_cvt = 0;
3489 vec<tree> vec_dsts = vNULL;
3490 vec<tree> interm_types = vNULL;
3491 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3492 int op_type;
3493 machine_mode rhs_mode;
3494 unsigned short fltsz;
3496 /* Is STMT a vectorizable conversion? */
3498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3499 return false;
3501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3502 return false;
3504 if (!is_gimple_assign (stmt))
3505 return false;
3507 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3508 return false;
3510 code = gimple_assign_rhs_code (stmt);
3511 if (!CONVERT_EXPR_CODE_P (code)
3512 && code != FIX_TRUNC_EXPR
3513 && code != FLOAT_EXPR
3514 && code != WIDEN_MULT_EXPR
3515 && code != WIDEN_LSHIFT_EXPR)
3516 return false;
3518 op_type = TREE_CODE_LENGTH (code);
3520 /* Check types of lhs and rhs. */
3521 scalar_dest = gimple_assign_lhs (stmt);
3522 lhs_type = TREE_TYPE (scalar_dest);
3523 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525 op0 = gimple_assign_rhs1 (stmt);
3526 rhs_type = TREE_TYPE (op0);
3528 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3529 && !((INTEGRAL_TYPE_P (lhs_type)
3530 && INTEGRAL_TYPE_P (rhs_type))
3531 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3532 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3533 return false;
3535 if ((INTEGRAL_TYPE_P (lhs_type)
3536 && (TYPE_PRECISION (lhs_type)
3537 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3538 || (INTEGRAL_TYPE_P (rhs_type)
3539 && (TYPE_PRECISION (rhs_type)
3540 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "type conversion to/from bit-precision unsupported."
3545 "\n");
3546 return false;
3549 /* Check the operands of the operation. */
3550 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3551 &def_stmt, &def, &dt[0], &vectype_in))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3555 "use not simple.\n");
3556 return false;
3558 if (op_type == binary_op)
3560 bool ok;
3562 op1 = gimple_assign_rhs2 (stmt);
3563 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3564 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3565 OP1. */
3566 if (CONSTANT_CLASS_P (op0))
3567 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3568 &def_stmt, &def, &dt[1], &vectype_in);
3569 else
3570 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3571 &def, &dt[1]);
3573 if (!ok)
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3577 "use not simple.\n");
3578 return false;
3582 /* If op0 is an external or constant defs use a vector type of
3583 the same size as the output vector type. */
3584 if (!vectype_in)
3585 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3586 if (vec_stmt)
3587 gcc_assert (vectype_in);
3588 if (!vectype_in)
3590 if (dump_enabled_p ())
3592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3593 "no vectype for scalar type ");
3594 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3595 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3598 return false;
3601 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3602 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3603 if (nunits_in < nunits_out)
3604 modifier = NARROW;
3605 else if (nunits_out == nunits_in)
3606 modifier = NONE;
3607 else
3608 modifier = WIDEN;
3610 /* Multiple types in SLP are handled by creating the appropriate number of
3611 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3612 case of SLP. */
3613 if (slp_node || PURE_SLP_STMT (stmt_info))
3614 ncopies = 1;
3615 else if (modifier == NARROW)
3616 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3617 else
3618 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3620 /* Sanity check: make sure that at least one copy of the vectorized stmt
3621 needs to be generated. */
3622 gcc_assert (ncopies >= 1);
3624 /* Supportable by target? */
3625 switch (modifier)
3627 case NONE:
3628 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3629 return false;
3630 if (supportable_convert_operation (code, vectype_out, vectype_in,
3631 &decl1, &code1))
3632 break;
3633 /* FALLTHRU */
3634 unsupported:
3635 if (dump_enabled_p ())
3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3637 "conversion not supported by target.\n");
3638 return false;
3640 case WIDEN:
3641 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3642 &code1, &code2, &multi_step_cvt,
3643 &interm_types))
3645 /* Binary widening operation can only be supported directly by the
3646 architecture. */
3647 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3648 break;
3651 if (code != FLOAT_EXPR
3652 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3653 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3654 goto unsupported;
3656 rhs_mode = TYPE_MODE (rhs_type);
3657 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3658 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3659 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3660 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662 cvt_type
3663 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3664 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3665 if (cvt_type == NULL_TREE)
3666 goto unsupported;
3668 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670 if (!supportable_convert_operation (code, vectype_out,
3671 cvt_type, &decl1, &codecvt1))
3672 goto unsupported;
3674 else if (!supportable_widening_operation (code, stmt, vectype_out,
3675 cvt_type, &codecvt1,
3676 &codecvt2, &multi_step_cvt,
3677 &interm_types))
3678 continue;
3679 else
3680 gcc_assert (multi_step_cvt == 0);
3682 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3683 vectype_in, &code1, &code2,
3684 &multi_step_cvt, &interm_types))
3685 break;
3688 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3689 goto unsupported;
3691 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3692 codecvt2 = ERROR_MARK;
3693 else
3695 multi_step_cvt++;
3696 interm_types.safe_push (cvt_type);
3697 cvt_type = NULL_TREE;
3699 break;
3701 case NARROW:
3702 gcc_assert (op_type == unary_op);
3703 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3704 &code1, &multi_step_cvt,
3705 &interm_types))
3706 break;
3708 if (code != FIX_TRUNC_EXPR
3709 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3710 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3711 goto unsupported;
3713 rhs_mode = TYPE_MODE (rhs_type);
3714 cvt_type
3715 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3716 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3717 if (cvt_type == NULL_TREE)
3718 goto unsupported;
3719 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3720 &decl1, &codecvt1))
3721 goto unsupported;
3722 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3723 &code1, &multi_step_cvt,
3724 &interm_types))
3725 break;
3726 goto unsupported;
3728 default:
3729 gcc_unreachable ();
3732 if (!vec_stmt) /* transformation not required. */
3734 if (dump_enabled_p ())
3735 dump_printf_loc (MSG_NOTE, vect_location,
3736 "=== vectorizable_conversion ===\n");
3737 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3739 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3740 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3742 else if (modifier == NARROW)
3744 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3745 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747 else
3749 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3750 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3752 interm_types.release ();
3753 return true;
3756 /** Transform. **/
3757 if (dump_enabled_p ())
3758 dump_printf_loc (MSG_NOTE, vect_location,
3759 "transform conversion. ncopies = %d.\n", ncopies);
3761 if (op_type == binary_op)
3763 if (CONSTANT_CLASS_P (op0))
3764 op0 = fold_convert (TREE_TYPE (op1), op0);
3765 else if (CONSTANT_CLASS_P (op1))
3766 op1 = fold_convert (TREE_TYPE (op0), op1);
3769 /* In case of multi-step conversion, we first generate conversion operations
3770 to the intermediate types, and then from that types to the final one.
3771 We create vector destinations for the intermediate type (TYPES) received
3772 from supportable_*_operation, and store them in the correct order
3773 for future use in vect_create_vectorized_*_stmts (). */
3774 vec_dsts.create (multi_step_cvt + 1);
3775 vec_dest = vect_create_destination_var (scalar_dest,
3776 (cvt_type && modifier == WIDEN)
3777 ? cvt_type : vectype_out);
3778 vec_dsts.quick_push (vec_dest);
3780 if (multi_step_cvt)
3782 for (i = interm_types.length () - 1;
3783 interm_types.iterate (i, &intermediate_type); i--)
3785 vec_dest = vect_create_destination_var (scalar_dest,
3786 intermediate_type);
3787 vec_dsts.quick_push (vec_dest);
3791 if (cvt_type)
3792 vec_dest = vect_create_destination_var (scalar_dest,
3793 modifier == WIDEN
3794 ? vectype_out : cvt_type);
3796 if (!slp_node)
3798 if (modifier == WIDEN)
3800 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3801 if (op_type == binary_op)
3802 vec_oprnds1.create (1);
3804 else if (modifier == NARROW)
3805 vec_oprnds0.create (
3806 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3808 else if (code == WIDEN_LSHIFT_EXPR)
3809 vec_oprnds1.create (slp_node->vec_stmts_size);
3811 last_oprnd = op0;
3812 prev_stmt_info = NULL;
3813 switch (modifier)
3815 case NONE:
3816 for (j = 0; j < ncopies; j++)
3818 if (j == 0)
3819 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3820 -1);
3821 else
3822 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3824 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3826 /* Arguments are ready, create the new vector stmt. */
3827 if (code1 == CALL_EXPR)
3829 new_stmt = gimple_build_call (decl1, 1, vop0);
3830 new_temp = make_ssa_name (vec_dest, new_stmt);
3831 gimple_call_set_lhs (new_stmt, new_temp);
3833 else
3835 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3836 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3837 new_temp = make_ssa_name (vec_dest, new_stmt);
3838 gimple_assign_set_lhs (new_stmt, new_temp);
3841 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3842 if (slp_node)
3843 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3846 if (j == 0)
3847 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3848 else
3849 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3850 prev_stmt_info = vinfo_for_stmt (new_stmt);
3852 break;
3854 case WIDEN:
3855 /* In case the vectorization factor (VF) is bigger than the number
3856 of elements that we can fit in a vectype (nunits), we have to
3857 generate more than one vector stmt - i.e - we need to "unroll"
3858 the vector stmt by a factor VF/nunits. */
3859 for (j = 0; j < ncopies; j++)
3861 /* Handle uses. */
3862 if (j == 0)
3864 if (slp_node)
3866 if (code == WIDEN_LSHIFT_EXPR)
3868 unsigned int k;
3870 vec_oprnd1 = op1;
3871 /* Store vec_oprnd1 for every vector stmt to be created
3872 for SLP_NODE. We check during the analysis that all
3873 the shift arguments are the same. */
3874 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3875 vec_oprnds1.quick_push (vec_oprnd1);
3877 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3878 slp_node, -1);
3880 else
3881 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3882 &vec_oprnds1, slp_node, -1);
3884 else
3886 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3887 vec_oprnds0.quick_push (vec_oprnd0);
3888 if (op_type == binary_op)
3890 if (code == WIDEN_LSHIFT_EXPR)
3891 vec_oprnd1 = op1;
3892 else
3893 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3894 NULL);
3895 vec_oprnds1.quick_push (vec_oprnd1);
3899 else
3901 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3902 vec_oprnds0.truncate (0);
3903 vec_oprnds0.quick_push (vec_oprnd0);
3904 if (op_type == binary_op)
3906 if (code == WIDEN_LSHIFT_EXPR)
3907 vec_oprnd1 = op1;
3908 else
3909 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3910 vec_oprnd1);
3911 vec_oprnds1.truncate (0);
3912 vec_oprnds1.quick_push (vec_oprnd1);
3916 /* Arguments are ready. Create the new vector stmts. */
3917 for (i = multi_step_cvt; i >= 0; i--)
3919 tree this_dest = vec_dsts[i];
3920 enum tree_code c1 = code1, c2 = code2;
3921 if (i == 0 && codecvt2 != ERROR_MARK)
3923 c1 = codecvt1;
3924 c2 = codecvt2;
3926 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3927 &vec_oprnds1,
3928 stmt, this_dest, gsi,
3929 c1, c2, decl1, decl2,
3930 op_type);
3933 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3935 if (cvt_type)
3937 if (codecvt1 == CALL_EXPR)
3939 new_stmt = gimple_build_call (decl1, 1, vop0);
3940 new_temp = make_ssa_name (vec_dest, new_stmt);
3941 gimple_call_set_lhs (new_stmt, new_temp);
3943 else
3945 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3946 new_temp = make_ssa_name (vec_dest);
3947 new_stmt = gimple_build_assign (new_temp, codecvt1,
3948 vop0);
3951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3953 else
3954 new_stmt = SSA_NAME_DEF_STMT (vop0);
3956 if (slp_node)
3957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3959 if (!prev_stmt_info)
3960 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3961 else
3962 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3963 prev_stmt_info = vinfo_for_stmt (new_stmt);
3967 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3968 break;
3970 case NARROW:
3971 /* In case the vectorization factor (VF) is bigger than the number
3972 of elements that we can fit in a vectype (nunits), we have to
3973 generate more than one vector stmt - i.e - we need to "unroll"
3974 the vector stmt by a factor VF/nunits. */
3975 for (j = 0; j < ncopies; j++)
3977 /* Handle uses. */
3978 if (slp_node)
3979 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3980 slp_node, -1);
3981 else
3983 vec_oprnds0.truncate (0);
3984 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3985 vect_pow2 (multi_step_cvt) - 1);
3988 /* Arguments are ready. Create the new vector stmts. */
3989 if (cvt_type)
3990 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3992 if (codecvt1 == CALL_EXPR)
3994 new_stmt = gimple_build_call (decl1, 1, vop0);
3995 new_temp = make_ssa_name (vec_dest, new_stmt);
3996 gimple_call_set_lhs (new_stmt, new_temp);
3998 else
4000 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4001 new_temp = make_ssa_name (vec_dest);
4002 new_stmt = gimple_build_assign (new_temp, codecvt1,
4003 vop0);
4006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4007 vec_oprnds0[i] = new_temp;
4010 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4011 stmt, vec_dsts, gsi,
4012 slp_node, code1,
4013 &prev_stmt_info);
4016 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4017 break;
4020 vec_oprnds0.release ();
4021 vec_oprnds1.release ();
4022 vec_dsts.release ();
4023 interm_types.release ();
4025 return true;
4029 /* Function vectorizable_assignment.
4031 Check if STMT performs an assignment (copy) that can be vectorized.
4032 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4033 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4034 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4036 static bool
4037 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4038 gimple *vec_stmt, slp_tree slp_node)
4040 tree vec_dest;
4041 tree scalar_dest;
4042 tree op;
4043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4044 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4046 tree new_temp;
4047 tree def;
4048 gimple def_stmt;
4049 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4050 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4051 int ncopies;
4052 int i, j;
4053 vec<tree> vec_oprnds = vNULL;
4054 tree vop;
4055 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4056 gimple new_stmt = NULL;
4057 stmt_vec_info prev_stmt_info = NULL;
4058 enum tree_code code;
4059 tree vectype_in;
4061 /* Multiple types in SLP are handled by creating the appropriate number of
4062 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4063 case of SLP. */
4064 if (slp_node || PURE_SLP_STMT (stmt_info))
4065 ncopies = 1;
4066 else
4067 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4069 gcc_assert (ncopies >= 1);
4071 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4072 return false;
4074 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4075 return false;
4077 /* Is vectorizable assignment? */
4078 if (!is_gimple_assign (stmt))
4079 return false;
4081 scalar_dest = gimple_assign_lhs (stmt);
4082 if (TREE_CODE (scalar_dest) != SSA_NAME)
4083 return false;
4085 code = gimple_assign_rhs_code (stmt);
4086 if (gimple_assign_single_p (stmt)
4087 || code == PAREN_EXPR
4088 || CONVERT_EXPR_CODE_P (code))
4089 op = gimple_assign_rhs1 (stmt);
4090 else
4091 return false;
4093 if (code == VIEW_CONVERT_EXPR)
4094 op = TREE_OPERAND (op, 0);
4096 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4097 &def_stmt, &def, &dt[0], &vectype_in))
4099 if (dump_enabled_p ())
4100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4101 "use not simple.\n");
4102 return false;
4105 /* We can handle NOP_EXPR conversions that do not change the number
4106 of elements or the vector size. */
4107 if ((CONVERT_EXPR_CODE_P (code)
4108 || code == VIEW_CONVERT_EXPR)
4109 && (!vectype_in
4110 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4111 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4112 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4113 return false;
4115 /* We do not handle bit-precision changes. */
4116 if ((CONVERT_EXPR_CODE_P (code)
4117 || code == VIEW_CONVERT_EXPR)
4118 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4119 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4120 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4121 || ((TYPE_PRECISION (TREE_TYPE (op))
4122 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4123 /* But a conversion that does not change the bit-pattern is ok. */
4124 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4125 > TYPE_PRECISION (TREE_TYPE (op)))
4126 && TYPE_UNSIGNED (TREE_TYPE (op))))
4128 if (dump_enabled_p ())
4129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4130 "type conversion to/from bit-precision "
4131 "unsupported.\n");
4132 return false;
4135 if (!vec_stmt) /* transformation not required. */
4137 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4138 if (dump_enabled_p ())
4139 dump_printf_loc (MSG_NOTE, vect_location,
4140 "=== vectorizable_assignment ===\n");
4141 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4142 return true;
4145 /** Transform. **/
4146 if (dump_enabled_p ())
4147 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4149 /* Handle def. */
4150 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4152 /* Handle use. */
4153 for (j = 0; j < ncopies; j++)
4155 /* Handle uses. */
4156 if (j == 0)
4157 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4158 else
4159 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4161 /* Arguments are ready. create the new vector stmt. */
4162 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4164 if (CONVERT_EXPR_CODE_P (code)
4165 || code == VIEW_CONVERT_EXPR)
4166 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4167 new_stmt = gimple_build_assign (vec_dest, vop);
4168 new_temp = make_ssa_name (vec_dest, new_stmt);
4169 gimple_assign_set_lhs (new_stmt, new_temp);
4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4171 if (slp_node)
4172 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4175 if (slp_node)
4176 continue;
4178 if (j == 0)
4179 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4180 else
4181 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4183 prev_stmt_info = vinfo_for_stmt (new_stmt);
4186 vec_oprnds.release ();
4187 return true;
4191 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4192 either as shift by a scalar or by a vector. */
4194 bool
4195 vect_supportable_shift (enum tree_code code, tree scalar_type)
4198 machine_mode vec_mode;
4199 optab optab;
4200 int icode;
4201 tree vectype;
4203 vectype = get_vectype_for_scalar_type (scalar_type);
4204 if (!vectype)
4205 return false;
4207 optab = optab_for_tree_code (code, vectype, optab_scalar);
4208 if (!optab
4209 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4211 optab = optab_for_tree_code (code, vectype, optab_vector);
4212 if (!optab
4213 || (optab_handler (optab, TYPE_MODE (vectype))
4214 == CODE_FOR_nothing))
4215 return false;
4218 vec_mode = TYPE_MODE (vectype);
4219 icode = (int) optab_handler (optab, vec_mode);
4220 if (icode == CODE_FOR_nothing)
4221 return false;
4223 return true;
4227 /* Function vectorizable_shift.
4229 Check if STMT performs a shift operation that can be vectorized.
4230 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4231 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4232 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4234 static bool
4235 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4236 gimple *vec_stmt, slp_tree slp_node)
4238 tree vec_dest;
4239 tree scalar_dest;
4240 tree op0, op1 = NULL;
4241 tree vec_oprnd1 = NULL_TREE;
4242 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4243 tree vectype;
4244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4245 enum tree_code code;
4246 machine_mode vec_mode;
4247 tree new_temp;
4248 optab optab;
4249 int icode;
4250 machine_mode optab_op2_mode;
4251 tree def;
4252 gimple def_stmt;
4253 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4254 gimple new_stmt = NULL;
4255 stmt_vec_info prev_stmt_info;
4256 int nunits_in;
4257 int nunits_out;
4258 tree vectype_out;
4259 tree op1_vectype;
4260 int ncopies;
4261 int j, i;
4262 vec<tree> vec_oprnds0 = vNULL;
4263 vec<tree> vec_oprnds1 = vNULL;
4264 tree vop0, vop1;
4265 unsigned int k;
4266 bool scalar_shift_arg = true;
4267 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4268 int vf;
4270 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4271 return false;
4273 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4274 return false;
4276 /* Is STMT a vectorizable binary/unary operation? */
4277 if (!is_gimple_assign (stmt))
4278 return false;
4280 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4281 return false;
4283 code = gimple_assign_rhs_code (stmt);
4285 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4286 || code == RROTATE_EXPR))
4287 return false;
4289 scalar_dest = gimple_assign_lhs (stmt);
4290 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4291 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4292 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4296 "bit-precision shifts not supported.\n");
4297 return false;
4300 op0 = gimple_assign_rhs1 (stmt);
4301 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4302 &def_stmt, &def, &dt[0], &vectype))
4304 if (dump_enabled_p ())
4305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4306 "use not simple.\n");
4307 return false;
4309 /* If op0 is an external or constant def use a vector type with
4310 the same size as the output vector type. */
4311 if (!vectype)
4312 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4313 if (vec_stmt)
4314 gcc_assert (vectype);
4315 if (!vectype)
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4319 "no vectype for scalar type\n");
4320 return false;
4323 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4324 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4325 if (nunits_out != nunits_in)
4326 return false;
4328 op1 = gimple_assign_rhs2 (stmt);
4329 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4330 &def, &dt[1], &op1_vectype))
4332 if (dump_enabled_p ())
4333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4334 "use not simple.\n");
4335 return false;
4338 if (loop_vinfo)
4339 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4340 else
4341 vf = 1;
4343 /* Multiple types in SLP are handled by creating the appropriate number of
4344 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4345 case of SLP. */
4346 if (slp_node || PURE_SLP_STMT (stmt_info))
4347 ncopies = 1;
4348 else
4349 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4351 gcc_assert (ncopies >= 1);
4353 /* Determine whether the shift amount is a vector, or scalar. If the
4354 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4356 if (dt[1] == vect_internal_def && !slp_node)
4357 scalar_shift_arg = false;
4358 else if (dt[1] == vect_constant_def
4359 || dt[1] == vect_external_def
4360 || dt[1] == vect_internal_def)
4362 /* In SLP, need to check whether the shift count is the same,
4363 in loops if it is a constant or invariant, it is always
4364 a scalar shift. */
4365 if (slp_node)
4367 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4368 gimple slpstmt;
4370 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4371 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4372 scalar_shift_arg = false;
4375 else
4377 if (dump_enabled_p ())
4378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4379 "operand mode requires invariant argument.\n");
4380 return false;
4383 /* Vector shifted by vector. */
4384 if (!scalar_shift_arg)
4386 optab = optab_for_tree_code (code, vectype, optab_vector);
4387 if (dump_enabled_p ())
4388 dump_printf_loc (MSG_NOTE, vect_location,
4389 "vector/vector shift/rotate found.\n");
4391 if (!op1_vectype)
4392 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4393 if (op1_vectype == NULL_TREE
4394 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4396 if (dump_enabled_p ())
4397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4398 "unusable type for last operand in"
4399 " vector/vector shift/rotate.\n");
4400 return false;
4403 /* See if the machine has a vector shifted by scalar insn and if not
4404 then see if it has a vector shifted by vector insn. */
4405 else
4407 optab = optab_for_tree_code (code, vectype, optab_scalar);
4408 if (optab
4409 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4411 if (dump_enabled_p ())
4412 dump_printf_loc (MSG_NOTE, vect_location,
4413 "vector/scalar shift/rotate found.\n");
4415 else
4417 optab = optab_for_tree_code (code, vectype, optab_vector);
4418 if (optab
4419 && (optab_handler (optab, TYPE_MODE (vectype))
4420 != CODE_FOR_nothing))
4422 scalar_shift_arg = false;
4424 if (dump_enabled_p ())
4425 dump_printf_loc (MSG_NOTE, vect_location,
4426 "vector/vector shift/rotate found.\n");
4428 /* Unlike the other binary operators, shifts/rotates have
4429 the rhs being int, instead of the same type as the lhs,
4430 so make sure the scalar is the right type if we are
4431 dealing with vectors of long long/long/short/char. */
4432 if (dt[1] == vect_constant_def)
4433 op1 = fold_convert (TREE_TYPE (vectype), op1);
4434 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4435 TREE_TYPE (op1)))
4437 if (slp_node
4438 && TYPE_MODE (TREE_TYPE (vectype))
4439 != TYPE_MODE (TREE_TYPE (op1)))
4441 if (dump_enabled_p ())
4442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4443 "unusable type for last operand in"
4444 " vector/vector shift/rotate.\n");
4445 return false;
4447 if (vec_stmt && !slp_node)
4449 op1 = fold_convert (TREE_TYPE (vectype), op1);
4450 op1 = vect_init_vector (stmt, op1,
4451 TREE_TYPE (vectype), NULL);
4458 /* Supportable by target? */
4459 if (!optab)
4461 if (dump_enabled_p ())
4462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4463 "no optab.\n");
4464 return false;
4466 vec_mode = TYPE_MODE (vectype);
4467 icode = (int) optab_handler (optab, vec_mode);
4468 if (icode == CODE_FOR_nothing)
4470 if (dump_enabled_p ())
4471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4472 "op not supported by target.\n");
4473 /* Check only during analysis. */
4474 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4475 || (vf < vect_min_worthwhile_factor (code)
4476 && !vec_stmt))
4477 return false;
4478 if (dump_enabled_p ())
4479 dump_printf_loc (MSG_NOTE, vect_location,
4480 "proceeding using word mode.\n");
4483 /* Worthwhile without SIMD support? Check only during analysis. */
4484 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4485 && vf < vect_min_worthwhile_factor (code)
4486 && !vec_stmt)
4488 if (dump_enabled_p ())
4489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4490 "not worthwhile without SIMD support.\n");
4491 return false;
4494 if (!vec_stmt) /* transformation not required. */
4496 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4497 if (dump_enabled_p ())
4498 dump_printf_loc (MSG_NOTE, vect_location,
4499 "=== vectorizable_shift ===\n");
4500 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4501 return true;
4504 /** Transform. **/
4506 if (dump_enabled_p ())
4507 dump_printf_loc (MSG_NOTE, vect_location,
4508 "transform binary/unary operation.\n");
4510 /* Handle def. */
4511 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4513 prev_stmt_info = NULL;
4514 for (j = 0; j < ncopies; j++)
4516 /* Handle uses. */
4517 if (j == 0)
4519 if (scalar_shift_arg)
4521 /* Vector shl and shr insn patterns can be defined with scalar
4522 operand 2 (shift operand). In this case, use constant or loop
4523 invariant op1 directly, without extending it to vector mode
4524 first. */
4525 optab_op2_mode = insn_data[icode].operand[2].mode;
4526 if (!VECTOR_MODE_P (optab_op2_mode))
4528 if (dump_enabled_p ())
4529 dump_printf_loc (MSG_NOTE, vect_location,
4530 "operand 1 using scalar mode.\n");
4531 vec_oprnd1 = op1;
4532 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4533 vec_oprnds1.quick_push (vec_oprnd1);
4534 if (slp_node)
4536 /* Store vec_oprnd1 for every vector stmt to be created
4537 for SLP_NODE. We check during the analysis that all
4538 the shift arguments are the same.
4539 TODO: Allow different constants for different vector
4540 stmts generated for an SLP instance. */
4541 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4542 vec_oprnds1.quick_push (vec_oprnd1);
4547 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4548 (a special case for certain kind of vector shifts); otherwise,
4549 operand 1 should be of a vector type (the usual case). */
4550 if (vec_oprnd1)
4551 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4552 slp_node, -1);
4553 else
4554 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4555 slp_node, -1);
4557 else
4558 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4560 /* Arguments are ready. Create the new vector stmt. */
4561 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4563 vop1 = vec_oprnds1[i];
4564 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4565 new_temp = make_ssa_name (vec_dest, new_stmt);
4566 gimple_assign_set_lhs (new_stmt, new_temp);
4567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4568 if (slp_node)
4569 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4572 if (slp_node)
4573 continue;
4575 if (j == 0)
4576 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4577 else
4578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4579 prev_stmt_info = vinfo_for_stmt (new_stmt);
4582 vec_oprnds0.release ();
4583 vec_oprnds1.release ();
4585 return true;
4589 /* Function vectorizable_operation.
4591 Check if STMT performs a binary, unary or ternary operation that can
4592 be vectorized.
4593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4594 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4597 static bool
4598 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4599 gimple *vec_stmt, slp_tree slp_node)
4601 tree vec_dest;
4602 tree scalar_dest;
4603 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4604 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4605 tree vectype;
4606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4607 enum tree_code code;
4608 machine_mode vec_mode;
4609 tree new_temp;
4610 int op_type;
4611 optab optab;
4612 int icode;
4613 tree def;
4614 gimple def_stmt;
4615 enum vect_def_type dt[3]
4616 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4617 gimple new_stmt = NULL;
4618 stmt_vec_info prev_stmt_info;
4619 int nunits_in;
4620 int nunits_out;
4621 tree vectype_out;
4622 int ncopies;
4623 int j, i;
4624 vec<tree> vec_oprnds0 = vNULL;
4625 vec<tree> vec_oprnds1 = vNULL;
4626 vec<tree> vec_oprnds2 = vNULL;
4627 tree vop0, vop1, vop2;
4628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4629 int vf;
4631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4632 return false;
4634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4635 return false;
4637 /* Is STMT a vectorizable binary/unary operation? */
4638 if (!is_gimple_assign (stmt))
4639 return false;
4641 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4642 return false;
4644 code = gimple_assign_rhs_code (stmt);
4646 /* For pointer addition, we should use the normal plus for
4647 the vector addition. */
4648 if (code == POINTER_PLUS_EXPR)
4649 code = PLUS_EXPR;
4651 /* Support only unary or binary operations. */
4652 op_type = TREE_CODE_LENGTH (code);
4653 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4655 if (dump_enabled_p ())
4656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4657 "num. args = %d (not unary/binary/ternary op).\n",
4658 op_type);
4659 return false;
4662 scalar_dest = gimple_assign_lhs (stmt);
4663 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4665 /* Most operations cannot handle bit-precision types without extra
4666 truncations. */
4667 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4668 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4669 /* Exception are bitwise binary operations. */
4670 && code != BIT_IOR_EXPR
4671 && code != BIT_XOR_EXPR
4672 && code != BIT_AND_EXPR)
4674 if (dump_enabled_p ())
4675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4676 "bit-precision arithmetic not supported.\n");
4677 return false;
4680 op0 = gimple_assign_rhs1 (stmt);
4681 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4682 &def_stmt, &def, &dt[0], &vectype))
4684 if (dump_enabled_p ())
4685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4686 "use not simple.\n");
4687 return false;
4689 /* If op0 is an external or constant def use a vector type with
4690 the same size as the output vector type. */
4691 if (!vectype)
4692 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4693 if (vec_stmt)
4694 gcc_assert (vectype);
4695 if (!vectype)
4697 if (dump_enabled_p ())
4699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4700 "no vectype for scalar type ");
4701 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4702 TREE_TYPE (op0));
4703 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4706 return false;
4709 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4710 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4711 if (nunits_out != nunits_in)
4712 return false;
4714 if (op_type == binary_op || op_type == ternary_op)
4716 op1 = gimple_assign_rhs2 (stmt);
4717 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4718 &def, &dt[1]))
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4722 "use not simple.\n");
4723 return false;
4726 if (op_type == ternary_op)
4728 op2 = gimple_assign_rhs3 (stmt);
4729 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4730 &def, &dt[2]))
4732 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4734 "use not simple.\n");
4735 return false;
4739 if (loop_vinfo)
4740 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4741 else
4742 vf = 1;
4744 /* Multiple types in SLP are handled by creating the appropriate number of
4745 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4746 case of SLP. */
4747 if (slp_node || PURE_SLP_STMT (stmt_info))
4748 ncopies = 1;
4749 else
4750 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4752 gcc_assert (ncopies >= 1);
4754 /* Shifts are handled in vectorizable_shift (). */
4755 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4756 || code == RROTATE_EXPR)
4757 return false;
4759 /* Supportable by target? */
4761 vec_mode = TYPE_MODE (vectype);
4762 if (code == MULT_HIGHPART_EXPR)
4764 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4765 icode = LAST_INSN_CODE;
4766 else
4767 icode = CODE_FOR_nothing;
4769 else
4771 optab = optab_for_tree_code (code, vectype, optab_default);
4772 if (!optab)
4774 if (dump_enabled_p ())
4775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4776 "no optab.\n");
4777 return false;
4779 icode = (int) optab_handler (optab, vec_mode);
4782 if (icode == CODE_FOR_nothing)
4784 if (dump_enabled_p ())
4785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4786 "op not supported by target.\n");
4787 /* Check only during analysis. */
4788 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4789 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4790 return false;
4791 if (dump_enabled_p ())
4792 dump_printf_loc (MSG_NOTE, vect_location,
4793 "proceeding using word mode.\n");
4796 /* Worthwhile without SIMD support? Check only during analysis. */
4797 if (!VECTOR_MODE_P (vec_mode)
4798 && !vec_stmt
4799 && vf < vect_min_worthwhile_factor (code))
4801 if (dump_enabled_p ())
4802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4803 "not worthwhile without SIMD support.\n");
4804 return false;
4807 if (!vec_stmt) /* transformation not required. */
4809 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_NOTE, vect_location,
4812 "=== vectorizable_operation ===\n");
4813 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4814 return true;
4817 /** Transform. **/
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_NOTE, vect_location,
4821 "transform binary/unary operation.\n");
4823 /* Handle def. */
4824 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4826 /* In case the vectorization factor (VF) is bigger than the number
4827 of elements that we can fit in a vectype (nunits), we have to generate
4828 more than one vector stmt - i.e - we need to "unroll" the
4829 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4830 from one copy of the vector stmt to the next, in the field
4831 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4832 stages to find the correct vector defs to be used when vectorizing
4833 stmts that use the defs of the current stmt. The example below
4834 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4835 we need to create 4 vectorized stmts):
4837 before vectorization:
4838 RELATED_STMT VEC_STMT
4839 S1: x = memref - -
4840 S2: z = x + 1 - -
4842 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4843 there):
4844 RELATED_STMT VEC_STMT
4845 VS1_0: vx0 = memref0 VS1_1 -
4846 VS1_1: vx1 = memref1 VS1_2 -
4847 VS1_2: vx2 = memref2 VS1_3 -
4848 VS1_3: vx3 = memref3 - -
4849 S1: x = load - VS1_0
4850 S2: z = x + 1 - -
4852 step2: vectorize stmt S2 (done here):
4853 To vectorize stmt S2 we first need to find the relevant vector
4854 def for the first operand 'x'. This is, as usual, obtained from
4855 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4856 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4857 relevant vector def 'vx0'. Having found 'vx0' we can generate
4858 the vector stmt VS2_0, and as usual, record it in the
4859 STMT_VINFO_VEC_STMT of stmt S2.
4860 When creating the second copy (VS2_1), we obtain the relevant vector
4861 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4862 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4863 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4864 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4865 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4866 chain of stmts and pointers:
4867 RELATED_STMT VEC_STMT
4868 VS1_0: vx0 = memref0 VS1_1 -
4869 VS1_1: vx1 = memref1 VS1_2 -
4870 VS1_2: vx2 = memref2 VS1_3 -
4871 VS1_3: vx3 = memref3 - -
4872 S1: x = load - VS1_0
4873 VS2_0: vz0 = vx0 + v1 VS2_1 -
4874 VS2_1: vz1 = vx1 + v1 VS2_2 -
4875 VS2_2: vz2 = vx2 + v1 VS2_3 -
4876 VS2_3: vz3 = vx3 + v1 - -
4877 S2: z = x + 1 - VS2_0 */
4879 prev_stmt_info = NULL;
4880 for (j = 0; j < ncopies; j++)
4882 /* Handle uses. */
4883 if (j == 0)
4885 if (op_type == binary_op || op_type == ternary_op)
4886 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4887 slp_node, -1);
4888 else
4889 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4890 slp_node, -1);
4891 if (op_type == ternary_op)
4893 vec_oprnds2.create (1);
4894 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4895 stmt,
4896 NULL));
4899 else
4901 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4902 if (op_type == ternary_op)
4904 tree vec_oprnd = vec_oprnds2.pop ();
4905 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4906 vec_oprnd));
4910 /* Arguments are ready. Create the new vector stmt. */
4911 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4913 vop1 = ((op_type == binary_op || op_type == ternary_op)
4914 ? vec_oprnds1[i] : NULL_TREE);
4915 vop2 = ((op_type == ternary_op)
4916 ? vec_oprnds2[i] : NULL_TREE);
4917 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4918 new_temp = make_ssa_name (vec_dest, new_stmt);
4919 gimple_assign_set_lhs (new_stmt, new_temp);
4920 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4921 if (slp_node)
4922 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4925 if (slp_node)
4926 continue;
4928 if (j == 0)
4929 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4930 else
4931 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4932 prev_stmt_info = vinfo_for_stmt (new_stmt);
4935 vec_oprnds0.release ();
4936 vec_oprnds1.release ();
4937 vec_oprnds2.release ();
4939 return true;
4942 /* A helper function to ensure data reference DR's base alignment
4943 for STMT_INFO. */
4945 static void
4946 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4948 if (!dr->aux)
4949 return;
4951 if (((dataref_aux *)dr->aux)->base_misaligned)
4953 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4954 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4956 if (decl_in_symtab_p (base_decl))
4957 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4958 else
4960 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4961 DECL_USER_ALIGN (base_decl) = 1;
4963 ((dataref_aux *)dr->aux)->base_misaligned = false;
4968 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4969 reversal of the vector elements. If that is impossible to do,
4970 returns NULL. */
4972 static tree
4973 perm_mask_for_reverse (tree vectype)
4975 int i, nunits;
4976 unsigned char *sel;
4978 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4979 sel = XALLOCAVEC (unsigned char, nunits);
4981 for (i = 0; i < nunits; ++i)
4982 sel[i] = nunits - 1 - i;
4984 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4985 return NULL_TREE;
4986 return vect_gen_perm_mask_checked (vectype, sel);
4989 /* Function vectorizable_store.
4991 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4992 can be vectorized.
4993 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4994 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4995 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4997 static bool
4998 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4999 slp_tree slp_node)
5001 tree scalar_dest;
5002 tree data_ref;
5003 tree op;
5004 tree vec_oprnd = NULL_TREE;
5005 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5006 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5008 tree elem_type;
5009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5010 struct loop *loop = NULL;
5011 machine_mode vec_mode;
5012 tree dummy;
5013 enum dr_alignment_support alignment_support_scheme;
5014 tree def;
5015 gimple def_stmt;
5016 enum vect_def_type dt;
5017 stmt_vec_info prev_stmt_info = NULL;
5018 tree dataref_ptr = NULL_TREE;
5019 tree dataref_offset = NULL_TREE;
5020 gimple ptr_incr = NULL;
5021 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5022 int ncopies;
5023 int j;
5024 gimple next_stmt, first_stmt = NULL;
5025 bool grouped_store = false;
5026 bool store_lanes_p = false;
5027 unsigned int group_size, i;
5028 vec<tree> dr_chain = vNULL;
5029 vec<tree> oprnds = vNULL;
5030 vec<tree> result_chain = vNULL;
5031 bool inv_p;
5032 bool negative = false;
5033 tree offset = NULL_TREE;
5034 vec<tree> vec_oprnds = vNULL;
5035 bool slp = (slp_node != NULL);
5036 unsigned int vec_num;
5037 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5038 tree aggr_type;
5040 if (loop_vinfo)
5041 loop = LOOP_VINFO_LOOP (loop_vinfo);
5043 /* Multiple types in SLP are handled by creating the appropriate number of
5044 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5045 case of SLP. */
5046 if (slp || PURE_SLP_STMT (stmt_info))
5047 ncopies = 1;
5048 else
5049 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5051 gcc_assert (ncopies >= 1);
5053 /* FORNOW. This restriction should be relaxed. */
5054 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5058 "multiple types in nested loop.\n");
5059 return false;
5062 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5063 return false;
5065 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5066 return false;
5068 /* Is vectorizable store? */
5070 if (!is_gimple_assign (stmt))
5071 return false;
5073 scalar_dest = gimple_assign_lhs (stmt);
5074 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5075 && is_pattern_stmt_p (stmt_info))
5076 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5077 if (TREE_CODE (scalar_dest) != ARRAY_REF
5078 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5079 && TREE_CODE (scalar_dest) != INDIRECT_REF
5080 && TREE_CODE (scalar_dest) != COMPONENT_REF
5081 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5082 && TREE_CODE (scalar_dest) != REALPART_EXPR
5083 && TREE_CODE (scalar_dest) != MEM_REF)
5084 return false;
5086 gcc_assert (gimple_assign_single_p (stmt));
5087 op = gimple_assign_rhs1 (stmt);
5088 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5089 &def, &dt))
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5093 "use not simple.\n");
5094 return false;
5097 elem_type = TREE_TYPE (vectype);
5098 vec_mode = TYPE_MODE (vectype);
5100 /* FORNOW. In some cases can vectorize even if data-type not supported
5101 (e.g. - array initialization with 0). */
5102 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5103 return false;
5105 if (!STMT_VINFO_DATA_REF (stmt_info))
5106 return false;
5108 if (!STMT_VINFO_STRIDED_P (stmt_info))
5110 negative =
5111 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5112 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5113 size_zero_node) < 0;
5114 if (negative && ncopies > 1)
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5118 "multiple types with negative step.\n");
5119 return false;
5121 if (negative)
5123 gcc_assert (!grouped_store);
5124 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5125 if (alignment_support_scheme != dr_aligned
5126 && alignment_support_scheme != dr_unaligned_supported)
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5130 "negative step but alignment required.\n");
5131 return false;
5133 if (dt != vect_constant_def
5134 && dt != vect_external_def
5135 && !perm_mask_for_reverse (vectype))
5137 if (dump_enabled_p ())
5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5139 "negative step and reversing not supported.\n");
5140 return false;
5145 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5147 grouped_store = true;
5148 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5149 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5150 if (!slp
5151 && !PURE_SLP_STMT (stmt_info)
5152 && !STMT_VINFO_STRIDED_P (stmt_info))
5154 if (vect_store_lanes_supported (vectype, group_size))
5155 store_lanes_p = true;
5156 else if (!vect_grouped_store_supported (vectype, group_size))
5157 return false;
5160 if (STMT_VINFO_STRIDED_P (stmt_info)
5161 && (slp || PURE_SLP_STMT (stmt_info))
5162 && (group_size > nunits
5163 || nunits % group_size != 0))
5165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5166 "unhandled strided group store\n");
5167 return false;
5170 if (first_stmt == stmt)
5172 /* STMT is the leader of the group. Check the operands of all the
5173 stmts of the group. */
5174 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5175 while (next_stmt)
5177 gcc_assert (gimple_assign_single_p (next_stmt));
5178 op = gimple_assign_rhs1 (next_stmt);
5179 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5180 &def_stmt, &def, &dt))
5182 if (dump_enabled_p ())
5183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5184 "use not simple.\n");
5185 return false;
5187 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5192 if (!vec_stmt) /* transformation not required. */
5194 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5195 /* The SLP costs are calculated during SLP analysis. */
5196 if (!PURE_SLP_STMT (stmt_info))
5197 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5198 NULL, NULL, NULL);
5199 return true;
5202 /** Transform. **/
5204 ensure_base_align (stmt_info, dr);
5206 if (grouped_store)
5208 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5209 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5211 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5213 /* FORNOW */
5214 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5216 /* We vectorize all the stmts of the interleaving group when we
5217 reach the last stmt in the group. */
5218 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5219 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5220 && !slp)
5222 *vec_stmt = NULL;
5223 return true;
5226 if (slp)
5228 grouped_store = false;
5229 /* VEC_NUM is the number of vect stmts to be created for this
5230 group. */
5231 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5232 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5233 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5234 op = gimple_assign_rhs1 (first_stmt);
5236 else
5237 /* VEC_NUM is the number of vect stmts to be created for this
5238 group. */
5239 vec_num = group_size;
5241 else
5243 first_stmt = stmt;
5244 first_dr = dr;
5245 group_size = vec_num = 1;
5248 if (dump_enabled_p ())
5249 dump_printf_loc (MSG_NOTE, vect_location,
5250 "transform store. ncopies = %d\n", ncopies);
5252 if (STMT_VINFO_STRIDED_P (stmt_info))
5254 gimple_stmt_iterator incr_gsi;
5255 bool insert_after;
5256 gimple incr;
5257 tree offvar;
5258 tree ivstep;
5259 tree running_off;
5260 gimple_seq stmts = NULL;
5261 tree stride_base, stride_step, alias_off;
5262 tree vec_oprnd;
5264 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5266 stride_base
5267 = fold_build_pointer_plus
5268 (unshare_expr (DR_BASE_ADDRESS (dr)),
5269 size_binop (PLUS_EXPR,
5270 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5271 convert_to_ptrofftype (DR_INIT(dr))));
5272 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5274 /* For a store with loop-invariant (but other than power-of-2)
5275 stride (i.e. not a grouped access) like so:
5277 for (i = 0; i < n; i += stride)
5278 array[i] = ...;
5280 we generate a new induction variable and new stores from
5281 the components of the (vectorized) rhs:
5283 for (j = 0; ; j += VF*stride)
5284 vectemp = ...;
5285 tmp1 = vectemp[0];
5286 array[j] = tmp1;
5287 tmp2 = vectemp[1];
5288 array[j + stride] = tmp2;
5292 unsigned nstores = nunits;
5293 tree ltype = elem_type;
5294 if (slp)
5296 nstores = nunits / group_size;
5297 if (group_size < nunits)
5298 ltype = build_vector_type (elem_type, group_size);
5299 else
5300 ltype = vectype;
5301 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5302 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5305 ivstep = stride_step;
5306 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5307 build_int_cst (TREE_TYPE (ivstep),
5308 ncopies * nstores));
5310 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5312 create_iv (stride_base, ivstep, NULL,
5313 loop, &incr_gsi, insert_after,
5314 &offvar, NULL);
5315 incr = gsi_stmt (incr_gsi);
5316 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5318 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5319 if (stmts)
5320 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5322 prev_stmt_info = NULL;
5323 running_off = offvar;
5324 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5325 for (j = 0; j < ncopies; j++)
5327 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5328 and first_stmt == stmt. */
5329 if (j == 0)
5330 vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
5331 else
5332 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5334 for (i = 0; i < nstores; i++)
5336 tree newref, newoff;
5337 gimple incr, assign;
5338 tree size = TYPE_SIZE (ltype);
5339 /* Extract the i'th component. */
5340 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
5341 size);
5342 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5343 size, pos);
5345 elem = force_gimple_operand_gsi (gsi, elem, true,
5346 NULL_TREE, true,
5347 GSI_SAME_STMT);
5349 newref = build2 (MEM_REF, ltype,
5350 running_off, alias_off);
5352 /* And store it to *running_off. */
5353 assign = gimple_build_assign (newref, elem);
5354 vect_finish_stmt_generation (stmt, assign, gsi);
5356 newoff = copy_ssa_name (running_off, NULL);
5357 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5358 running_off, stride_step);
5359 vect_finish_stmt_generation (stmt, incr, gsi);
5361 running_off = newoff;
5362 if (j == 0 && i == i)
5363 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5364 else
5365 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5366 prev_stmt_info = vinfo_for_stmt (assign);
5369 return true;
5372 dr_chain.create (group_size);
5373 oprnds.create (group_size);
5375 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5376 gcc_assert (alignment_support_scheme);
5377 /* Targets with store-lane instructions must not require explicit
5378 realignment. */
5379 gcc_assert (!store_lanes_p
5380 || alignment_support_scheme == dr_aligned
5381 || alignment_support_scheme == dr_unaligned_supported);
5383 if (negative)
5384 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5386 if (store_lanes_p)
5387 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5388 else
5389 aggr_type = vectype;
5391 /* In case the vectorization factor (VF) is bigger than the number
5392 of elements that we can fit in a vectype (nunits), we have to generate
5393 more than one vector stmt - i.e - we need to "unroll" the
5394 vector stmt by a factor VF/nunits. For more details see documentation in
5395 vect_get_vec_def_for_copy_stmt. */
5397 /* In case of interleaving (non-unit grouped access):
5399 S1: &base + 2 = x2
5400 S2: &base = x0
5401 S3: &base + 1 = x1
5402 S4: &base + 3 = x3
5404 We create vectorized stores starting from base address (the access of the
5405 first stmt in the chain (S2 in the above example), when the last store stmt
5406 of the chain (S4) is reached:
5408 VS1: &base = vx2
5409 VS2: &base + vec_size*1 = vx0
5410 VS3: &base + vec_size*2 = vx1
5411 VS4: &base + vec_size*3 = vx3
5413 Then permutation statements are generated:
5415 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5416 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5419 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5420 (the order of the data-refs in the output of vect_permute_store_chain
5421 corresponds to the order of scalar stmts in the interleaving chain - see
5422 the documentation of vect_permute_store_chain()).
5424 In case of both multiple types and interleaving, above vector stores and
5425 permutation stmts are created for every copy. The result vector stmts are
5426 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5427 STMT_VINFO_RELATED_STMT for the next copies.
5430 prev_stmt_info = NULL;
5431 for (j = 0; j < ncopies; j++)
5433 gimple new_stmt;
5435 if (j == 0)
5437 if (slp)
5439 /* Get vectorized arguments for SLP_NODE. */
5440 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5441 NULL, slp_node, -1);
5443 vec_oprnd = vec_oprnds[0];
5445 else
5447 /* For interleaved stores we collect vectorized defs for all the
5448 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5449 used as an input to vect_permute_store_chain(), and OPRNDS as
5450 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5452 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5453 OPRNDS are of size 1. */
5454 next_stmt = first_stmt;
5455 for (i = 0; i < group_size; i++)
5457 /* Since gaps are not supported for interleaved stores,
5458 GROUP_SIZE is the exact number of stmts in the chain.
5459 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5460 there is no interleaving, GROUP_SIZE is 1, and only one
5461 iteration of the loop will be executed. */
5462 gcc_assert (next_stmt
5463 && gimple_assign_single_p (next_stmt));
5464 op = gimple_assign_rhs1 (next_stmt);
5466 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5467 NULL);
5468 dr_chain.quick_push (vec_oprnd);
5469 oprnds.quick_push (vec_oprnd);
5470 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5474 /* We should have catched mismatched types earlier. */
5475 gcc_assert (useless_type_conversion_p (vectype,
5476 TREE_TYPE (vec_oprnd)));
5477 bool simd_lane_access_p
5478 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5479 if (simd_lane_access_p
5480 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5481 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5482 && integer_zerop (DR_OFFSET (first_dr))
5483 && integer_zerop (DR_INIT (first_dr))
5484 && alias_sets_conflict_p (get_alias_set (aggr_type),
5485 get_alias_set (DR_REF (first_dr))))
5487 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5488 dataref_offset = build_int_cst (reference_alias_ptr_type
5489 (DR_REF (first_dr)), 0);
5490 inv_p = false;
5492 else
5493 dataref_ptr
5494 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5495 simd_lane_access_p ? loop : NULL,
5496 offset, &dummy, gsi, &ptr_incr,
5497 simd_lane_access_p, &inv_p);
5498 gcc_assert (bb_vinfo || !inv_p);
5500 else
5502 /* For interleaved stores we created vectorized defs for all the
5503 defs stored in OPRNDS in the previous iteration (previous copy).
5504 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5505 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5506 next copy.
5507 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5508 OPRNDS are of size 1. */
5509 for (i = 0; i < group_size; i++)
5511 op = oprnds[i];
5512 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5513 &def, &dt);
5514 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5515 dr_chain[i] = vec_oprnd;
5516 oprnds[i] = vec_oprnd;
5518 if (dataref_offset)
5519 dataref_offset
5520 = int_const_binop (PLUS_EXPR, dataref_offset,
5521 TYPE_SIZE_UNIT (aggr_type));
5522 else
5523 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5524 TYPE_SIZE_UNIT (aggr_type));
5527 if (store_lanes_p)
5529 tree vec_array;
5531 /* Combine all the vectors into an array. */
5532 vec_array = create_vector_array (vectype, vec_num);
5533 for (i = 0; i < vec_num; i++)
5535 vec_oprnd = dr_chain[i];
5536 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5539 /* Emit:
5540 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5541 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5542 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5543 gimple_call_set_lhs (new_stmt, data_ref);
5544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5546 else
5548 new_stmt = NULL;
5549 if (grouped_store)
5551 if (j == 0)
5552 result_chain.create (group_size);
5553 /* Permute. */
5554 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5555 &result_chain);
5558 next_stmt = first_stmt;
5559 for (i = 0; i < vec_num; i++)
5561 unsigned align, misalign;
5563 if (i > 0)
5564 /* Bump the vector pointer. */
5565 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5566 stmt, NULL_TREE);
5568 if (slp)
5569 vec_oprnd = vec_oprnds[i];
5570 else if (grouped_store)
5571 /* For grouped stores vectorized defs are interleaved in
5572 vect_permute_store_chain(). */
5573 vec_oprnd = result_chain[i];
5575 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5576 dataref_offset
5577 ? dataref_offset
5578 : build_int_cst (reference_alias_ptr_type
5579 (DR_REF (first_dr)), 0));
5580 align = TYPE_ALIGN_UNIT (vectype);
5581 if (aligned_access_p (first_dr))
5582 misalign = 0;
5583 else if (DR_MISALIGNMENT (first_dr) == -1)
5585 TREE_TYPE (data_ref)
5586 = build_aligned_type (TREE_TYPE (data_ref),
5587 TYPE_ALIGN (elem_type));
5588 align = TYPE_ALIGN_UNIT (elem_type);
5589 misalign = 0;
5591 else
5593 TREE_TYPE (data_ref)
5594 = build_aligned_type (TREE_TYPE (data_ref),
5595 TYPE_ALIGN (elem_type));
5596 misalign = DR_MISALIGNMENT (first_dr);
5598 if (dataref_offset == NULL_TREE)
5599 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5600 misalign);
5602 if (negative
5603 && dt != vect_constant_def
5604 && dt != vect_external_def)
5606 tree perm_mask = perm_mask_for_reverse (vectype);
5607 tree perm_dest
5608 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5609 vectype);
5610 tree new_temp = make_ssa_name (perm_dest);
5612 /* Generate the permute statement. */
5613 gimple perm_stmt
5614 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5615 vec_oprnd, perm_mask);
5616 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5618 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5619 vec_oprnd = new_temp;
5622 /* Arguments are ready. Create the new vector stmt. */
5623 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5624 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5626 if (slp)
5627 continue;
5629 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5630 if (!next_stmt)
5631 break;
5634 if (!slp)
5636 if (j == 0)
5637 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5638 else
5639 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5640 prev_stmt_info = vinfo_for_stmt (new_stmt);
5644 dr_chain.release ();
5645 oprnds.release ();
5646 result_chain.release ();
5647 vec_oprnds.release ();
5649 return true;
5652 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5653 VECTOR_CST mask. No checks are made that the target platform supports the
5654 mask, so callers may wish to test can_vec_perm_p separately, or use
5655 vect_gen_perm_mask_checked. */
5657 tree
5658 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5660 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5661 int i, nunits;
5663 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5665 mask_elt_type = lang_hooks.types.type_for_mode
5666 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5667 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5669 mask_elts = XALLOCAVEC (tree, nunits);
5670 for (i = nunits - 1; i >= 0; i--)
5671 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5672 mask_vec = build_vector (mask_type, mask_elts);
5674 return mask_vec;
5677 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5678 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5680 tree
5681 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5683 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5684 return vect_gen_perm_mask_any (vectype, sel);
5687 /* Given a vector variable X and Y, that was generated for the scalar
5688 STMT, generate instructions to permute the vector elements of X and Y
5689 using permutation mask MASK_VEC, insert them at *GSI and return the
5690 permuted vector variable. */
5692 static tree
5693 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5694 gimple_stmt_iterator *gsi)
5696 tree vectype = TREE_TYPE (x);
5697 tree perm_dest, data_ref;
5698 gimple perm_stmt;
5700 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5701 data_ref = make_ssa_name (perm_dest);
5703 /* Generate the permute statement. */
5704 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5705 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5707 return data_ref;
5710 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5711 inserting them on the loops preheader edge. Returns true if we
5712 were successful in doing so (and thus STMT can be moved then),
5713 otherwise returns false. */
5715 static bool
5716 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5718 ssa_op_iter i;
5719 tree op;
5720 bool any = false;
5722 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5724 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5725 if (!gimple_nop_p (def_stmt)
5726 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5728 /* Make sure we don't need to recurse. While we could do
5729 so in simple cases when there are more complex use webs
5730 we don't have an easy way to preserve stmt order to fulfil
5731 dependencies within them. */
5732 tree op2;
5733 ssa_op_iter i2;
5734 if (gimple_code (def_stmt) == GIMPLE_PHI)
5735 return false;
5736 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5738 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5739 if (!gimple_nop_p (def_stmt2)
5740 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5741 return false;
5743 any = true;
5747 if (!any)
5748 return true;
5750 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5752 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5753 if (!gimple_nop_p (def_stmt)
5754 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5756 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5757 gsi_remove (&gsi, false);
5758 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5762 return true;
5765 /* vectorizable_load.
5767 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5768 can be vectorized.
5769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5770 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5773 static bool
5774 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5775 slp_tree slp_node, slp_instance slp_node_instance)
5777 tree scalar_dest;
5778 tree vec_dest = NULL;
5779 tree data_ref = NULL;
5780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5781 stmt_vec_info prev_stmt_info;
5782 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5783 struct loop *loop = NULL;
5784 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5785 bool nested_in_vect_loop = false;
5786 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5787 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5788 tree elem_type;
5789 tree new_temp;
5790 machine_mode mode;
5791 gimple new_stmt = NULL;
5792 tree dummy;
5793 enum dr_alignment_support alignment_support_scheme;
5794 tree dataref_ptr = NULL_TREE;
5795 tree dataref_offset = NULL_TREE;
5796 gimple ptr_incr = NULL;
5797 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5798 int ncopies;
5799 int i, j, group_size = -1, group_gap_adj;
5800 tree msq = NULL_TREE, lsq;
5801 tree offset = NULL_TREE;
5802 tree byte_offset = NULL_TREE;
5803 tree realignment_token = NULL_TREE;
5804 gphi *phi = NULL;
5805 vec<tree> dr_chain = vNULL;
5806 bool grouped_load = false;
5807 bool load_lanes_p = false;
5808 gimple first_stmt;
5809 bool inv_p;
5810 bool negative = false;
5811 bool compute_in_loop = false;
5812 struct loop *at_loop;
5813 int vec_num;
5814 bool slp = (slp_node != NULL);
5815 bool slp_perm = false;
5816 enum tree_code code;
5817 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5818 int vf;
5819 tree aggr_type;
5820 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5821 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5822 int gather_scale = 1;
5823 enum vect_def_type gather_dt = vect_unknown_def_type;
5825 if (loop_vinfo)
5827 loop = LOOP_VINFO_LOOP (loop_vinfo);
5828 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5829 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5831 else
5832 vf = 1;
5834 /* Multiple types in SLP are handled by creating the appropriate number of
5835 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5836 case of SLP. */
5837 if (slp || PURE_SLP_STMT (stmt_info))
5838 ncopies = 1;
5839 else
5840 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5842 gcc_assert (ncopies >= 1);
5844 /* FORNOW. This restriction should be relaxed. */
5845 if (nested_in_vect_loop && ncopies > 1)
5847 if (dump_enabled_p ())
5848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5849 "multiple types in nested loop.\n");
5850 return false;
5853 /* Invalidate assumptions made by dependence analysis when vectorization
5854 on the unrolled body effectively re-orders stmts. */
5855 if (ncopies > 1
5856 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5857 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5858 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "cannot perform implicit CSE when unrolling "
5863 "with negative dependence distance\n");
5864 return false;
5867 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5868 return false;
5870 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5871 return false;
5873 /* Is vectorizable load? */
5874 if (!is_gimple_assign (stmt))
5875 return false;
5877 scalar_dest = gimple_assign_lhs (stmt);
5878 if (TREE_CODE (scalar_dest) != SSA_NAME)
5879 return false;
5881 code = gimple_assign_rhs_code (stmt);
5882 if (code != ARRAY_REF
5883 && code != BIT_FIELD_REF
5884 && code != INDIRECT_REF
5885 && code != COMPONENT_REF
5886 && code != IMAGPART_EXPR
5887 && code != REALPART_EXPR
5888 && code != MEM_REF
5889 && TREE_CODE_CLASS (code) != tcc_declaration)
5890 return false;
5892 if (!STMT_VINFO_DATA_REF (stmt_info))
5893 return false;
5895 elem_type = TREE_TYPE (vectype);
5896 mode = TYPE_MODE (vectype);
5898 /* FORNOW. In some cases can vectorize even if data-type not supported
5899 (e.g. - data copies). */
5900 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5902 if (dump_enabled_p ())
5903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5904 "Aligned load, but unsupported type.\n");
5905 return false;
5908 /* Check if the load is a part of an interleaving chain. */
5909 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5911 grouped_load = true;
5912 /* FORNOW */
5913 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5915 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5917 /* If this is single-element interleaving with an element distance
5918 that leaves unused vector loads around punt - we at least create
5919 very sub-optimal code in that case (and blow up memory,
5920 see PR65518). */
5921 if (first_stmt == stmt
5922 && !GROUP_NEXT_ELEMENT (stmt_info)
5923 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5927 "single-element interleaving not supported "
5928 "for not adjacent vector loads\n");
5929 return false;
5932 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5933 slp_perm = true;
5935 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5936 if (!slp
5937 && !PURE_SLP_STMT (stmt_info)
5938 && !STMT_VINFO_STRIDED_P (stmt_info))
5940 if (vect_load_lanes_supported (vectype, group_size))
5941 load_lanes_p = true;
5942 else if (!vect_grouped_load_supported (vectype, group_size))
5943 return false;
5946 /* Invalidate assumptions made by dependence analysis when vectorization
5947 on the unrolled body effectively re-orders stmts. */
5948 if (!PURE_SLP_STMT (stmt_info)
5949 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5950 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5951 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5955 "cannot perform implicit CSE when performing "
5956 "group loads with negative dependence distance\n");
5957 return false;
5960 /* Similarly when the stmt is a load that is both part of a SLP
5961 instance and a loop vectorized stmt via the same-dr mechanism
5962 we have to give up. */
5963 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5964 && (STMT_SLP_TYPE (stmt_info)
5965 != STMT_SLP_TYPE (vinfo_for_stmt
5966 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "conflicting SLP types for CSEd load\n");
5971 return false;
5976 if (STMT_VINFO_GATHER_P (stmt_info))
5978 gimple def_stmt;
5979 tree def;
5980 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5981 &gather_off, &gather_scale);
5982 gcc_assert (gather_decl);
5983 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5984 &def_stmt, &def, &gather_dt,
5985 &gather_off_vectype))
5987 if (dump_enabled_p ())
5988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5989 "gather index use not simple.\n");
5990 return false;
5993 else if (STMT_VINFO_STRIDED_P (stmt_info))
5995 if ((grouped_load
5996 && (slp || PURE_SLP_STMT (stmt_info)))
5997 && (group_size > nunits
5998 || nunits % group_size != 0))
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6001 "unhandled strided group load\n");
6002 return false;
6005 else
6007 negative = tree_int_cst_compare (nested_in_vect_loop
6008 ? STMT_VINFO_DR_STEP (stmt_info)
6009 : DR_STEP (dr),
6010 size_zero_node) < 0;
6011 if (negative && ncopies > 1)
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015 "multiple types with negative step.\n");
6016 return false;
6019 if (negative)
6021 if (grouped_load)
6023 if (dump_enabled_p ())
6024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6025 "negative step for group load not supported"
6026 "\n");
6027 return false;
6029 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6030 if (alignment_support_scheme != dr_aligned
6031 && alignment_support_scheme != dr_unaligned_supported)
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "negative step but alignment required.\n");
6036 return false;
6038 if (!perm_mask_for_reverse (vectype))
6040 if (dump_enabled_p ())
6041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6042 "negative step and reversing not supported."
6043 "\n");
6044 return false;
6049 if (!vec_stmt) /* transformation not required. */
6051 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6052 /* The SLP costs are calculated during SLP analysis. */
6053 if (!PURE_SLP_STMT (stmt_info))
6054 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6055 NULL, NULL, NULL);
6056 return true;
6059 if (dump_enabled_p ())
6060 dump_printf_loc (MSG_NOTE, vect_location,
6061 "transform load. ncopies = %d\n", ncopies);
6063 /** Transform. **/
6065 ensure_base_align (stmt_info, dr);
6067 if (STMT_VINFO_GATHER_P (stmt_info))
6069 tree vec_oprnd0 = NULL_TREE, op;
6070 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6071 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6072 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6073 edge pe = loop_preheader_edge (loop);
6074 gimple_seq seq;
6075 basic_block new_bb;
6076 enum { NARROW, NONE, WIDEN } modifier;
6077 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6079 if (nunits == gather_off_nunits)
6080 modifier = NONE;
6081 else if (nunits == gather_off_nunits / 2)
6083 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6084 modifier = WIDEN;
6086 for (i = 0; i < gather_off_nunits; ++i)
6087 sel[i] = i | nunits;
6089 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6091 else if (nunits == gather_off_nunits * 2)
6093 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6094 modifier = NARROW;
6096 for (i = 0; i < nunits; ++i)
6097 sel[i] = i < gather_off_nunits
6098 ? i : i + nunits - gather_off_nunits;
6100 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6101 ncopies *= 2;
6103 else
6104 gcc_unreachable ();
6106 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6107 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6108 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6109 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6110 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6111 scaletype = TREE_VALUE (arglist);
6112 gcc_checking_assert (types_compatible_p (srctype, rettype));
6114 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6116 ptr = fold_convert (ptrtype, gather_base);
6117 if (!is_gimple_min_invariant (ptr))
6119 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6120 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6121 gcc_assert (!new_bb);
6124 /* Currently we support only unconditional gather loads,
6125 so mask should be all ones. */
6126 if (TREE_CODE (masktype) == INTEGER_TYPE)
6127 mask = build_int_cst (masktype, -1);
6128 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6130 mask = build_int_cst (TREE_TYPE (masktype), -1);
6131 mask = build_vector_from_val (masktype, mask);
6132 mask = vect_init_vector (stmt, mask, masktype, NULL);
6134 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6136 REAL_VALUE_TYPE r;
6137 long tmp[6];
6138 for (j = 0; j < 6; ++j)
6139 tmp[j] = -1;
6140 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6141 mask = build_real (TREE_TYPE (masktype), r);
6142 mask = build_vector_from_val (masktype, mask);
6143 mask = vect_init_vector (stmt, mask, masktype, NULL);
6145 else
6146 gcc_unreachable ();
6148 scale = build_int_cst (scaletype, gather_scale);
6150 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6151 merge = build_int_cst (TREE_TYPE (rettype), 0);
6152 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6154 REAL_VALUE_TYPE r;
6155 long tmp[6];
6156 for (j = 0; j < 6; ++j)
6157 tmp[j] = 0;
6158 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6159 merge = build_real (TREE_TYPE (rettype), r);
6161 else
6162 gcc_unreachable ();
6163 merge = build_vector_from_val (rettype, merge);
6164 merge = vect_init_vector (stmt, merge, rettype, NULL);
6166 prev_stmt_info = NULL;
6167 for (j = 0; j < ncopies; ++j)
6169 if (modifier == WIDEN && (j & 1))
6170 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6171 perm_mask, stmt, gsi);
6172 else if (j == 0)
6173 op = vec_oprnd0
6174 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6175 else
6176 op = vec_oprnd0
6177 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6179 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6181 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6182 == TYPE_VECTOR_SUBPARTS (idxtype));
6183 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6184 var = make_ssa_name (var);
6185 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6186 new_stmt
6187 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6189 op = var;
6192 new_stmt
6193 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6195 if (!useless_type_conversion_p (vectype, rettype))
6197 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6198 == TYPE_VECTOR_SUBPARTS (rettype));
6199 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6200 op = make_ssa_name (var, new_stmt);
6201 gimple_call_set_lhs (new_stmt, op);
6202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6203 var = make_ssa_name (vec_dest);
6204 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6205 new_stmt
6206 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6208 else
6210 var = make_ssa_name (vec_dest, new_stmt);
6211 gimple_call_set_lhs (new_stmt, var);
6214 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6216 if (modifier == NARROW)
6218 if ((j & 1) == 0)
6220 prev_res = var;
6221 continue;
6223 var = permute_vec_elements (prev_res, var,
6224 perm_mask, stmt, gsi);
6225 new_stmt = SSA_NAME_DEF_STMT (var);
6228 if (prev_stmt_info == NULL)
6229 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6230 else
6231 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6232 prev_stmt_info = vinfo_for_stmt (new_stmt);
6234 return true;
6236 else if (STMT_VINFO_STRIDED_P (stmt_info))
6238 gimple_stmt_iterator incr_gsi;
6239 bool insert_after;
6240 gimple incr;
6241 tree offvar;
6242 tree ivstep;
6243 tree running_off;
6244 vec<constructor_elt, va_gc> *v = NULL;
6245 gimple_seq stmts = NULL;
6246 tree stride_base, stride_step, alias_off;
6248 gcc_assert (!nested_in_vect_loop);
6250 stride_base
6251 = fold_build_pointer_plus
6252 (unshare_expr (DR_BASE_ADDRESS (dr)),
6253 size_binop (PLUS_EXPR,
6254 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6255 convert_to_ptrofftype (DR_INIT (dr))));
6256 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6258 /* For a load with loop-invariant (but other than power-of-2)
6259 stride (i.e. not a grouped access) like so:
6261 for (i = 0; i < n; i += stride)
6262 ... = array[i];
6264 we generate a new induction variable and new accesses to
6265 form a new vector (or vectors, depending on ncopies):
6267 for (j = 0; ; j += VF*stride)
6268 tmp1 = array[j];
6269 tmp2 = array[j + stride];
6271 vectemp = {tmp1, tmp2, ...}
6274 ivstep = stride_step;
6275 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6276 build_int_cst (TREE_TYPE (ivstep), vf));
6278 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6280 create_iv (stride_base, ivstep, NULL,
6281 loop, &incr_gsi, insert_after,
6282 &offvar, NULL);
6283 incr = gsi_stmt (incr_gsi);
6284 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6286 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6287 if (stmts)
6288 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6290 prev_stmt_info = NULL;
6291 running_off = offvar;
6292 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6293 int nloads = nunits;
6294 tree ltype = TREE_TYPE (vectype);
6295 auto_vec<tree> dr_chain;
6296 if (slp)
6298 nloads = nunits / group_size;
6299 if (group_size < nunits)
6300 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6301 else
6302 ltype = vectype;
6303 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6304 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6305 if (slp_perm)
6306 dr_chain.create (ncopies);
6308 for (j = 0; j < ncopies; j++)
6310 tree vec_inv;
6312 if (nloads > 1)
6314 vec_alloc (v, nloads);
6315 for (i = 0; i < nloads; i++)
6317 tree newref, newoff;
6318 gimple incr;
6319 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6321 newref = force_gimple_operand_gsi (gsi, newref, true,
6322 NULL_TREE, true,
6323 GSI_SAME_STMT);
6324 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6325 newoff = copy_ssa_name (running_off);
6326 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6327 running_off, stride_step);
6328 vect_finish_stmt_generation (stmt, incr, gsi);
6330 running_off = newoff;
6333 vec_inv = build_constructor (vectype, v);
6334 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6335 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6337 else
6339 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6340 build2 (MEM_REF, ltype,
6341 running_off, alias_off));
6342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6344 tree newoff = copy_ssa_name (running_off);
6345 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6346 running_off, stride_step);
6347 vect_finish_stmt_generation (stmt, incr, gsi);
6349 running_off = newoff;
6352 if (slp)
6354 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6355 if (slp_perm)
6356 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6358 if (j == 0)
6359 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6360 else
6361 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6362 prev_stmt_info = vinfo_for_stmt (new_stmt);
6364 if (slp_perm)
6365 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6366 slp_node_instance, false);
6367 return true;
6370 if (grouped_load)
6372 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6373 if (slp
6374 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6375 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6376 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6378 /* Check if the chain of loads is already vectorized. */
6379 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6380 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6381 ??? But we can only do so if there is exactly one
6382 as we have no way to get at the rest. Leave the CSE
6383 opportunity alone.
6384 ??? With the group load eventually participating
6385 in multiple different permutations (having multiple
6386 slp nodes which refer to the same group) the CSE
6387 is even wrong code. See PR56270. */
6388 && !slp)
6390 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6391 return true;
6393 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6394 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6395 group_gap_adj = 0;
6397 /* VEC_NUM is the number of vect stmts to be created for this group. */
6398 if (slp)
6400 grouped_load = false;
6401 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6402 group_gap_adj = vf * group_size - nunits * vec_num;
6404 else
6405 vec_num = group_size;
6407 else
6409 first_stmt = stmt;
6410 first_dr = dr;
6411 group_size = vec_num = 1;
6412 group_gap_adj = 0;
6415 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6416 gcc_assert (alignment_support_scheme);
6417 /* Targets with load-lane instructions must not require explicit
6418 realignment. */
6419 gcc_assert (!load_lanes_p
6420 || alignment_support_scheme == dr_aligned
6421 || alignment_support_scheme == dr_unaligned_supported);
6423 /* In case the vectorization factor (VF) is bigger than the number
6424 of elements that we can fit in a vectype (nunits), we have to generate
6425 more than one vector stmt - i.e - we need to "unroll" the
6426 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6427 from one copy of the vector stmt to the next, in the field
6428 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6429 stages to find the correct vector defs to be used when vectorizing
6430 stmts that use the defs of the current stmt. The example below
6431 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6432 need to create 4 vectorized stmts):
6434 before vectorization:
6435 RELATED_STMT VEC_STMT
6436 S1: x = memref - -
6437 S2: z = x + 1 - -
6439 step 1: vectorize stmt S1:
6440 We first create the vector stmt VS1_0, and, as usual, record a
6441 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6442 Next, we create the vector stmt VS1_1, and record a pointer to
6443 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6444 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6445 stmts and pointers:
6446 RELATED_STMT VEC_STMT
6447 VS1_0: vx0 = memref0 VS1_1 -
6448 VS1_1: vx1 = memref1 VS1_2 -
6449 VS1_2: vx2 = memref2 VS1_3 -
6450 VS1_3: vx3 = memref3 - -
6451 S1: x = load - VS1_0
6452 S2: z = x + 1 - -
6454 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6455 information we recorded in RELATED_STMT field is used to vectorize
6456 stmt S2. */
6458 /* In case of interleaving (non-unit grouped access):
6460 S1: x2 = &base + 2
6461 S2: x0 = &base
6462 S3: x1 = &base + 1
6463 S4: x3 = &base + 3
6465 Vectorized loads are created in the order of memory accesses
6466 starting from the access of the first stmt of the chain:
6468 VS1: vx0 = &base
6469 VS2: vx1 = &base + vec_size*1
6470 VS3: vx3 = &base + vec_size*2
6471 VS4: vx4 = &base + vec_size*3
6473 Then permutation statements are generated:
6475 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6476 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6479 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6480 (the order of the data-refs in the output of vect_permute_load_chain
6481 corresponds to the order of scalar stmts in the interleaving chain - see
6482 the documentation of vect_permute_load_chain()).
6483 The generation of permutation stmts and recording them in
6484 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6486 In case of both multiple types and interleaving, the vector loads and
6487 permutation stmts above are created for every copy. The result vector
6488 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6489 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6491 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6492 on a target that supports unaligned accesses (dr_unaligned_supported)
6493 we generate the following code:
6494 p = initial_addr;
6495 indx = 0;
6496 loop {
6497 p = p + indx * vectype_size;
6498 vec_dest = *(p);
6499 indx = indx + 1;
6502 Otherwise, the data reference is potentially unaligned on a target that
6503 does not support unaligned accesses (dr_explicit_realign_optimized) -
6504 then generate the following code, in which the data in each iteration is
6505 obtained by two vector loads, one from the previous iteration, and one
6506 from the current iteration:
6507 p1 = initial_addr;
6508 msq_init = *(floor(p1))
6509 p2 = initial_addr + VS - 1;
6510 realignment_token = call target_builtin;
6511 indx = 0;
6512 loop {
6513 p2 = p2 + indx * vectype_size
6514 lsq = *(floor(p2))
6515 vec_dest = realign_load (msq, lsq, realignment_token)
6516 indx = indx + 1;
6517 msq = lsq;
6518 } */
6520 /* If the misalignment remains the same throughout the execution of the
6521 loop, we can create the init_addr and permutation mask at the loop
6522 preheader. Otherwise, it needs to be created inside the loop.
6523 This can only occur when vectorizing memory accesses in the inner-loop
6524 nested within an outer-loop that is being vectorized. */
6526 if (nested_in_vect_loop
6527 && (TREE_INT_CST_LOW (DR_STEP (dr))
6528 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6530 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6531 compute_in_loop = true;
6534 if ((alignment_support_scheme == dr_explicit_realign_optimized
6535 || alignment_support_scheme == dr_explicit_realign)
6536 && !compute_in_loop)
6538 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6539 alignment_support_scheme, NULL_TREE,
6540 &at_loop);
6541 if (alignment_support_scheme == dr_explicit_realign_optimized)
6543 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6544 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6545 size_one_node);
6548 else
6549 at_loop = loop;
6551 if (negative)
6552 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6554 if (load_lanes_p)
6555 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6556 else
6557 aggr_type = vectype;
6559 prev_stmt_info = NULL;
6560 for (j = 0; j < ncopies; j++)
6562 /* 1. Create the vector or array pointer update chain. */
6563 if (j == 0)
6565 bool simd_lane_access_p
6566 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6567 if (simd_lane_access_p
6568 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6569 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6570 && integer_zerop (DR_OFFSET (first_dr))
6571 && integer_zerop (DR_INIT (first_dr))
6572 && alias_sets_conflict_p (get_alias_set (aggr_type),
6573 get_alias_set (DR_REF (first_dr)))
6574 && (alignment_support_scheme == dr_aligned
6575 || alignment_support_scheme == dr_unaligned_supported))
6577 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6578 dataref_offset = build_int_cst (reference_alias_ptr_type
6579 (DR_REF (first_dr)), 0);
6580 inv_p = false;
6582 else
6583 dataref_ptr
6584 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6585 offset, &dummy, gsi, &ptr_incr,
6586 simd_lane_access_p, &inv_p,
6587 byte_offset);
6589 else if (dataref_offset)
6590 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6591 TYPE_SIZE_UNIT (aggr_type));
6592 else
6593 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6594 TYPE_SIZE_UNIT (aggr_type));
6596 if (grouped_load || slp_perm)
6597 dr_chain.create (vec_num);
6599 if (load_lanes_p)
6601 tree vec_array;
6603 vec_array = create_vector_array (vectype, vec_num);
6605 /* Emit:
6606 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6607 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6608 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6609 gimple_call_set_lhs (new_stmt, vec_array);
6610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6612 /* Extract each vector into an SSA_NAME. */
6613 for (i = 0; i < vec_num; i++)
6615 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6616 vec_array, i);
6617 dr_chain.quick_push (new_temp);
6620 /* Record the mapping between SSA_NAMEs and statements. */
6621 vect_record_grouped_load_vectors (stmt, dr_chain);
6623 else
6625 for (i = 0; i < vec_num; i++)
6627 if (i > 0)
6628 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6629 stmt, NULL_TREE);
6631 /* 2. Create the vector-load in the loop. */
6632 switch (alignment_support_scheme)
6634 case dr_aligned:
6635 case dr_unaligned_supported:
6637 unsigned int align, misalign;
6639 data_ref
6640 = build2 (MEM_REF, vectype, dataref_ptr,
6641 dataref_offset
6642 ? dataref_offset
6643 : build_int_cst (reference_alias_ptr_type
6644 (DR_REF (first_dr)), 0));
6645 align = TYPE_ALIGN_UNIT (vectype);
6646 if (alignment_support_scheme == dr_aligned)
6648 gcc_assert (aligned_access_p (first_dr));
6649 misalign = 0;
6651 else if (DR_MISALIGNMENT (first_dr) == -1)
6653 TREE_TYPE (data_ref)
6654 = build_aligned_type (TREE_TYPE (data_ref),
6655 TYPE_ALIGN (elem_type));
6656 align = TYPE_ALIGN_UNIT (elem_type);
6657 misalign = 0;
6659 else
6661 TREE_TYPE (data_ref)
6662 = build_aligned_type (TREE_TYPE (data_ref),
6663 TYPE_ALIGN (elem_type));
6664 misalign = DR_MISALIGNMENT (first_dr);
6666 if (dataref_offset == NULL_TREE)
6667 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6668 align, misalign);
6669 break;
6671 case dr_explicit_realign:
6673 tree ptr, bump;
6675 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6677 if (compute_in_loop)
6678 msq = vect_setup_realignment (first_stmt, gsi,
6679 &realignment_token,
6680 dr_explicit_realign,
6681 dataref_ptr, NULL);
6683 ptr = copy_ssa_name (dataref_ptr);
6684 new_stmt = gimple_build_assign
6685 (ptr, BIT_AND_EXPR, dataref_ptr,
6686 build_int_cst
6687 (TREE_TYPE (dataref_ptr),
6688 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6689 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6690 data_ref
6691 = build2 (MEM_REF, vectype, ptr,
6692 build_int_cst (reference_alias_ptr_type
6693 (DR_REF (first_dr)), 0));
6694 vec_dest = vect_create_destination_var (scalar_dest,
6695 vectype);
6696 new_stmt = gimple_build_assign (vec_dest, data_ref);
6697 new_temp = make_ssa_name (vec_dest, new_stmt);
6698 gimple_assign_set_lhs (new_stmt, new_temp);
6699 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6700 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6701 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6702 msq = new_temp;
6704 bump = size_binop (MULT_EXPR, vs,
6705 TYPE_SIZE_UNIT (elem_type));
6706 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6707 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6708 new_stmt = gimple_build_assign
6709 (NULL_TREE, BIT_AND_EXPR, ptr,
6710 build_int_cst
6711 (TREE_TYPE (ptr),
6712 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6713 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6714 gimple_assign_set_lhs (new_stmt, ptr);
6715 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6716 data_ref
6717 = build2 (MEM_REF, vectype, ptr,
6718 build_int_cst (reference_alias_ptr_type
6719 (DR_REF (first_dr)), 0));
6720 break;
6722 case dr_explicit_realign_optimized:
6723 new_temp = copy_ssa_name (dataref_ptr);
6724 new_stmt = gimple_build_assign
6725 (new_temp, BIT_AND_EXPR, dataref_ptr,
6726 build_int_cst
6727 (TREE_TYPE (dataref_ptr),
6728 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6730 data_ref
6731 = build2 (MEM_REF, vectype, new_temp,
6732 build_int_cst (reference_alias_ptr_type
6733 (DR_REF (first_dr)), 0));
6734 break;
6735 default:
6736 gcc_unreachable ();
6738 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6739 new_stmt = gimple_build_assign (vec_dest, data_ref);
6740 new_temp = make_ssa_name (vec_dest, new_stmt);
6741 gimple_assign_set_lhs (new_stmt, new_temp);
6742 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6744 /* 3. Handle explicit realignment if necessary/supported.
6745 Create in loop:
6746 vec_dest = realign_load (msq, lsq, realignment_token) */
6747 if (alignment_support_scheme == dr_explicit_realign_optimized
6748 || alignment_support_scheme == dr_explicit_realign)
6750 lsq = gimple_assign_lhs (new_stmt);
6751 if (!realignment_token)
6752 realignment_token = dataref_ptr;
6753 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6754 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6755 msq, lsq, realignment_token);
6756 new_temp = make_ssa_name (vec_dest, new_stmt);
6757 gimple_assign_set_lhs (new_stmt, new_temp);
6758 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6760 if (alignment_support_scheme == dr_explicit_realign_optimized)
6762 gcc_assert (phi);
6763 if (i == vec_num - 1 && j == ncopies - 1)
6764 add_phi_arg (phi, lsq,
6765 loop_latch_edge (containing_loop),
6766 UNKNOWN_LOCATION);
6767 msq = lsq;
6771 /* 4. Handle invariant-load. */
6772 if (inv_p && !bb_vinfo)
6774 gcc_assert (!grouped_load);
6775 /* If we have versioned for aliasing or the loop doesn't
6776 have any data dependencies that would preclude this,
6777 then we are sure this is a loop invariant load and
6778 thus we can insert it on the preheader edge. */
6779 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6780 && !nested_in_vect_loop
6781 && hoist_defs_of_uses (stmt, loop))
6783 if (dump_enabled_p ())
6785 dump_printf_loc (MSG_NOTE, vect_location,
6786 "hoisting out of the vectorized "
6787 "loop: ");
6788 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6790 tree tem = copy_ssa_name (scalar_dest);
6791 gsi_insert_on_edge_immediate
6792 (loop_preheader_edge (loop),
6793 gimple_build_assign (tem,
6794 unshare_expr
6795 (gimple_assign_rhs1 (stmt))));
6796 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6798 else
6800 gimple_stmt_iterator gsi2 = *gsi;
6801 gsi_next (&gsi2);
6802 new_temp = vect_init_vector (stmt, scalar_dest,
6803 vectype, &gsi2);
6805 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6806 set_vinfo_for_stmt (new_stmt,
6807 new_stmt_vec_info (new_stmt, loop_vinfo,
6808 bb_vinfo));
6811 if (negative)
6813 tree perm_mask = perm_mask_for_reverse (vectype);
6814 new_temp = permute_vec_elements (new_temp, new_temp,
6815 perm_mask, stmt, gsi);
6816 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6819 /* Collect vector loads and later create their permutation in
6820 vect_transform_grouped_load (). */
6821 if (grouped_load || slp_perm)
6822 dr_chain.quick_push (new_temp);
6824 /* Store vector loads in the corresponding SLP_NODE. */
6825 if (slp && !slp_perm)
6826 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6828 /* Bump the vector pointer to account for a gap or for excess
6829 elements loaded for a permuted SLP load. */
6830 if (group_gap_adj != 0)
6832 bool ovf;
6833 tree bump
6834 = wide_int_to_tree (sizetype,
6835 wi::smul (TYPE_SIZE_UNIT (elem_type),
6836 group_gap_adj, &ovf));
6837 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6838 stmt, bump);
6842 if (slp && !slp_perm)
6843 continue;
6845 if (slp_perm)
6847 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6848 slp_node_instance, false))
6850 dr_chain.release ();
6851 return false;
6854 else
6856 if (grouped_load)
6858 if (!load_lanes_p)
6859 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6860 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6862 else
6864 if (j == 0)
6865 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6866 else
6867 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6868 prev_stmt_info = vinfo_for_stmt (new_stmt);
6871 dr_chain.release ();
6874 return true;
6877 /* Function vect_is_simple_cond.
6879 Input:
6880 LOOP - the loop that is being vectorized.
6881 COND - Condition that is checked for simple use.
6883 Output:
6884 *COMP_VECTYPE - the vector type for the comparison.
6886 Returns whether a COND can be vectorized. Checks whether
6887 condition operands are supportable using vec_is_simple_use. */
6889 static bool
6890 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6891 bb_vec_info bb_vinfo, tree *comp_vectype)
6893 tree lhs, rhs;
6894 tree def;
6895 enum vect_def_type dt;
6896 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6898 if (!COMPARISON_CLASS_P (cond))
6899 return false;
6901 lhs = TREE_OPERAND (cond, 0);
6902 rhs = TREE_OPERAND (cond, 1);
6904 if (TREE_CODE (lhs) == SSA_NAME)
6906 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6907 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6908 &lhs_def_stmt, &def, &dt, &vectype1))
6909 return false;
6911 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6912 && TREE_CODE (lhs) != FIXED_CST)
6913 return false;
6915 if (TREE_CODE (rhs) == SSA_NAME)
6917 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6918 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6919 &rhs_def_stmt, &def, &dt, &vectype2))
6920 return false;
6922 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6923 && TREE_CODE (rhs) != FIXED_CST)
6924 return false;
6926 *comp_vectype = vectype1 ? vectype1 : vectype2;
6927 return true;
6930 /* vectorizable_condition.
6932 Check if STMT is conditional modify expression that can be vectorized.
6933 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6934 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6935 at GSI.
6937 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6938 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6939 else caluse if it is 2).
6941 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6943 bool
6944 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6945 gimple *vec_stmt, tree reduc_def, int reduc_index,
6946 slp_tree slp_node)
6948 tree scalar_dest = NULL_TREE;
6949 tree vec_dest = NULL_TREE;
6950 tree cond_expr, then_clause, else_clause;
6951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6952 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6953 tree comp_vectype = NULL_TREE;
6954 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6955 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6956 tree vec_compare, vec_cond_expr;
6957 tree new_temp;
6958 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6959 tree def;
6960 enum vect_def_type dt, dts[4];
6961 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6962 int ncopies;
6963 enum tree_code code;
6964 stmt_vec_info prev_stmt_info = NULL;
6965 int i, j;
6966 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6967 vec<tree> vec_oprnds0 = vNULL;
6968 vec<tree> vec_oprnds1 = vNULL;
6969 vec<tree> vec_oprnds2 = vNULL;
6970 vec<tree> vec_oprnds3 = vNULL;
6971 tree vec_cmp_type;
6973 if (slp_node || PURE_SLP_STMT (stmt_info))
6974 ncopies = 1;
6975 else
6976 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6978 gcc_assert (ncopies >= 1);
6979 if (reduc_index && ncopies > 1)
6980 return false; /* FORNOW */
6982 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6983 return false;
6985 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6986 return false;
6988 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6989 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6990 && reduc_def))
6991 return false;
6993 /* FORNOW: not yet supported. */
6994 if (STMT_VINFO_LIVE_P (stmt_info))
6996 if (dump_enabled_p ())
6997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6998 "value used after loop.\n");
6999 return false;
7002 /* Is vectorizable conditional operation? */
7003 if (!is_gimple_assign (stmt))
7004 return false;
7006 code = gimple_assign_rhs_code (stmt);
7008 if (code != COND_EXPR)
7009 return false;
7011 cond_expr = gimple_assign_rhs1 (stmt);
7012 then_clause = gimple_assign_rhs2 (stmt);
7013 else_clause = gimple_assign_rhs3 (stmt);
7015 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7016 &comp_vectype)
7017 || !comp_vectype)
7018 return false;
7020 if (TREE_CODE (then_clause) == SSA_NAME)
7022 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7023 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7024 &then_def_stmt, &def, &dt))
7025 return false;
7027 else if (TREE_CODE (then_clause) != INTEGER_CST
7028 && TREE_CODE (then_clause) != REAL_CST
7029 && TREE_CODE (then_clause) != FIXED_CST)
7030 return false;
7032 if (TREE_CODE (else_clause) == SSA_NAME)
7034 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7035 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7036 &else_def_stmt, &def, &dt))
7037 return false;
7039 else if (TREE_CODE (else_clause) != INTEGER_CST
7040 && TREE_CODE (else_clause) != REAL_CST
7041 && TREE_CODE (else_clause) != FIXED_CST)
7042 return false;
7044 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7045 /* The result of a vector comparison should be signed type. */
7046 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7047 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7048 if (vec_cmp_type == NULL_TREE)
7049 return false;
7051 if (!vec_stmt)
7053 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7054 return expand_vec_cond_expr_p (vectype, comp_vectype);
7057 /* Transform. */
7059 if (!slp_node)
7061 vec_oprnds0.create (1);
7062 vec_oprnds1.create (1);
7063 vec_oprnds2.create (1);
7064 vec_oprnds3.create (1);
7067 /* Handle def. */
7068 scalar_dest = gimple_assign_lhs (stmt);
7069 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7071 /* Handle cond expr. */
7072 for (j = 0; j < ncopies; j++)
7074 gassign *new_stmt = NULL;
7075 if (j == 0)
7077 if (slp_node)
7079 auto_vec<tree, 4> ops;
7080 auto_vec<vec<tree>, 4> vec_defs;
7082 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7083 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7084 ops.safe_push (then_clause);
7085 ops.safe_push (else_clause);
7086 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7087 vec_oprnds3 = vec_defs.pop ();
7088 vec_oprnds2 = vec_defs.pop ();
7089 vec_oprnds1 = vec_defs.pop ();
7090 vec_oprnds0 = vec_defs.pop ();
7092 ops.release ();
7093 vec_defs.release ();
7095 else
7097 gimple gtemp;
7098 vec_cond_lhs =
7099 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7100 stmt, NULL);
7101 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7102 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7104 vec_cond_rhs =
7105 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7106 stmt, NULL);
7107 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7108 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7109 if (reduc_index == 1)
7110 vec_then_clause = reduc_def;
7111 else
7113 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7114 stmt, NULL);
7115 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7116 NULL, &gtemp, &def, &dts[2]);
7118 if (reduc_index == 2)
7119 vec_else_clause = reduc_def;
7120 else
7122 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7123 stmt, NULL);
7124 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7125 NULL, &gtemp, &def, &dts[3]);
7129 else
7131 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7132 vec_oprnds0.pop ());
7133 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7134 vec_oprnds1.pop ());
7135 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7136 vec_oprnds2.pop ());
7137 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7138 vec_oprnds3.pop ());
7141 if (!slp_node)
7143 vec_oprnds0.quick_push (vec_cond_lhs);
7144 vec_oprnds1.quick_push (vec_cond_rhs);
7145 vec_oprnds2.quick_push (vec_then_clause);
7146 vec_oprnds3.quick_push (vec_else_clause);
7149 /* Arguments are ready. Create the new vector stmt. */
7150 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7152 vec_cond_rhs = vec_oprnds1[i];
7153 vec_then_clause = vec_oprnds2[i];
7154 vec_else_clause = vec_oprnds3[i];
7156 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7157 vec_cond_lhs, vec_cond_rhs);
7158 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7159 vec_compare, vec_then_clause, vec_else_clause);
7161 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7162 new_temp = make_ssa_name (vec_dest, new_stmt);
7163 gimple_assign_set_lhs (new_stmt, new_temp);
7164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7165 if (slp_node)
7166 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7169 if (slp_node)
7170 continue;
7172 if (j == 0)
7173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7174 else
7175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7177 prev_stmt_info = vinfo_for_stmt (new_stmt);
7180 vec_oprnds0.release ();
7181 vec_oprnds1.release ();
7182 vec_oprnds2.release ();
7183 vec_oprnds3.release ();
7185 return true;
7189 /* Make sure the statement is vectorizable. */
7191 bool
7192 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7194 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7195 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7196 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7197 bool ok;
7198 tree scalar_type, vectype;
7199 gimple pattern_stmt;
7200 gimple_seq pattern_def_seq;
7202 if (dump_enabled_p ())
7204 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7205 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7208 if (gimple_has_volatile_ops (stmt))
7210 if (dump_enabled_p ())
7211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7212 "not vectorized: stmt has volatile operands\n");
7214 return false;
7217 /* Skip stmts that do not need to be vectorized. In loops this is expected
7218 to include:
7219 - the COND_EXPR which is the loop exit condition
7220 - any LABEL_EXPRs in the loop
7221 - computations that are used only for array indexing or loop control.
7222 In basic blocks we only analyze statements that are a part of some SLP
7223 instance, therefore, all the statements are relevant.
7225 Pattern statement needs to be analyzed instead of the original statement
7226 if the original statement is not relevant. Otherwise, we analyze both
7227 statements. In basic blocks we are called from some SLP instance
7228 traversal, don't analyze pattern stmts instead, the pattern stmts
7229 already will be part of SLP instance. */
7231 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7232 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7233 && !STMT_VINFO_LIVE_P (stmt_info))
7235 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7236 && pattern_stmt
7237 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7238 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7240 /* Analyze PATTERN_STMT instead of the original stmt. */
7241 stmt = pattern_stmt;
7242 stmt_info = vinfo_for_stmt (pattern_stmt);
7243 if (dump_enabled_p ())
7245 dump_printf_loc (MSG_NOTE, vect_location,
7246 "==> examining pattern statement: ");
7247 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7250 else
7252 if (dump_enabled_p ())
7253 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7255 return true;
7258 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7259 && node == NULL
7260 && pattern_stmt
7261 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7262 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7264 /* Analyze PATTERN_STMT too. */
7265 if (dump_enabled_p ())
7267 dump_printf_loc (MSG_NOTE, vect_location,
7268 "==> examining pattern statement: ");
7269 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7272 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7273 return false;
7276 if (is_pattern_stmt_p (stmt_info)
7277 && node == NULL
7278 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7280 gimple_stmt_iterator si;
7282 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7284 gimple pattern_def_stmt = gsi_stmt (si);
7285 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7286 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7288 /* Analyze def stmt of STMT if it's a pattern stmt. */
7289 if (dump_enabled_p ())
7291 dump_printf_loc (MSG_NOTE, vect_location,
7292 "==> examining pattern def statement: ");
7293 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7296 if (!vect_analyze_stmt (pattern_def_stmt,
7297 need_to_vectorize, node))
7298 return false;
7303 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7305 case vect_internal_def:
7306 break;
7308 case vect_reduction_def:
7309 case vect_nested_cycle:
7310 gcc_assert (!bb_vinfo
7311 && (relevance == vect_used_in_outer
7312 || relevance == vect_used_in_outer_by_reduction
7313 || relevance == vect_used_by_reduction
7314 || relevance == vect_unused_in_scope));
7315 break;
7317 case vect_induction_def:
7318 case vect_constant_def:
7319 case vect_external_def:
7320 case vect_unknown_def_type:
7321 default:
7322 gcc_unreachable ();
7325 if (bb_vinfo)
7327 gcc_assert (PURE_SLP_STMT (stmt_info));
7329 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7330 if (dump_enabled_p ())
7332 dump_printf_loc (MSG_NOTE, vect_location,
7333 "get vectype for scalar type: ");
7334 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7335 dump_printf (MSG_NOTE, "\n");
7338 vectype = get_vectype_for_scalar_type (scalar_type);
7339 if (!vectype)
7341 if (dump_enabled_p ())
7343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7344 "not SLPed: unsupported data-type ");
7345 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7346 scalar_type);
7347 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7349 return false;
7352 if (dump_enabled_p ())
7354 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7355 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7356 dump_printf (MSG_NOTE, "\n");
7359 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7362 if (STMT_VINFO_RELEVANT_P (stmt_info))
7364 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7365 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7366 || (is_gimple_call (stmt)
7367 && gimple_call_lhs (stmt) == NULL_TREE));
7368 *need_to_vectorize = true;
7371 if (PURE_SLP_STMT (stmt_info) && !node)
7373 dump_printf_loc (MSG_NOTE, vect_location,
7374 "handled only by SLP analysis\n");
7375 return true;
7378 ok = true;
7379 if (!bb_vinfo
7380 && (STMT_VINFO_RELEVANT_P (stmt_info)
7381 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7382 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7383 || vectorizable_conversion (stmt, NULL, NULL, node)
7384 || vectorizable_shift (stmt, NULL, NULL, node)
7385 || vectorizable_operation (stmt, NULL, NULL, node)
7386 || vectorizable_assignment (stmt, NULL, NULL, node)
7387 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7388 || vectorizable_call (stmt, NULL, NULL, node)
7389 || vectorizable_store (stmt, NULL, NULL, node)
7390 || vectorizable_reduction (stmt, NULL, NULL, node)
7391 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7392 else
7394 if (bb_vinfo)
7395 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7396 || vectorizable_conversion (stmt, NULL, NULL, node)
7397 || vectorizable_shift (stmt, NULL, NULL, node)
7398 || vectorizable_operation (stmt, NULL, NULL, node)
7399 || vectorizable_assignment (stmt, NULL, NULL, node)
7400 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7401 || vectorizable_call (stmt, NULL, NULL, node)
7402 || vectorizable_store (stmt, NULL, NULL, node)
7403 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7406 if (!ok)
7408 if (dump_enabled_p ())
7410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7411 "not vectorized: relevant stmt not ");
7412 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7413 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7416 return false;
7419 if (bb_vinfo)
7420 return true;
7422 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7423 need extra handling, except for vectorizable reductions. */
7424 if (STMT_VINFO_LIVE_P (stmt_info)
7425 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7426 ok = vectorizable_live_operation (stmt, NULL, NULL);
7428 if (!ok)
7430 if (dump_enabled_p ())
7432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7433 "not vectorized: live stmt not ");
7434 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7435 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7438 return false;
7441 return true;
7445 /* Function vect_transform_stmt.
7447 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7449 bool
7450 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7451 bool *grouped_store, slp_tree slp_node,
7452 slp_instance slp_node_instance)
7454 bool is_store = false;
7455 gimple vec_stmt = NULL;
7456 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7457 bool done;
7459 switch (STMT_VINFO_TYPE (stmt_info))
7461 case type_demotion_vec_info_type:
7462 case type_promotion_vec_info_type:
7463 case type_conversion_vec_info_type:
7464 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7465 gcc_assert (done);
7466 break;
7468 case induc_vec_info_type:
7469 gcc_assert (!slp_node);
7470 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7471 gcc_assert (done);
7472 break;
7474 case shift_vec_info_type:
7475 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7476 gcc_assert (done);
7477 break;
7479 case op_vec_info_type:
7480 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7481 gcc_assert (done);
7482 break;
7484 case assignment_vec_info_type:
7485 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7486 gcc_assert (done);
7487 break;
7489 case load_vec_info_type:
7490 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7491 slp_node_instance);
7492 gcc_assert (done);
7493 break;
7495 case store_vec_info_type:
7496 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7497 gcc_assert (done);
7498 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7500 /* In case of interleaving, the whole chain is vectorized when the
7501 last store in the chain is reached. Store stmts before the last
7502 one are skipped, and there vec_stmt_info shouldn't be freed
7503 meanwhile. */
7504 *grouped_store = true;
7505 if (STMT_VINFO_VEC_STMT (stmt_info))
7506 is_store = true;
7508 else
7509 is_store = true;
7510 break;
7512 case condition_vec_info_type:
7513 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7514 gcc_assert (done);
7515 break;
7517 case call_vec_info_type:
7518 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7519 stmt = gsi_stmt (*gsi);
7520 if (is_gimple_call (stmt)
7521 && gimple_call_internal_p (stmt)
7522 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7523 is_store = true;
7524 break;
7526 case call_simd_clone_vec_info_type:
7527 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7528 stmt = gsi_stmt (*gsi);
7529 break;
7531 case reduc_vec_info_type:
7532 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7533 gcc_assert (done);
7534 break;
7536 default:
7537 if (!STMT_VINFO_LIVE_P (stmt_info))
7539 if (dump_enabled_p ())
7540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7541 "stmt not supported.\n");
7542 gcc_unreachable ();
7546 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7547 is being vectorized, but outside the immediately enclosing loop. */
7548 if (vec_stmt
7549 && STMT_VINFO_LOOP_VINFO (stmt_info)
7550 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7551 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7552 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7553 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7554 || STMT_VINFO_RELEVANT (stmt_info) ==
7555 vect_used_in_outer_by_reduction))
7557 struct loop *innerloop = LOOP_VINFO_LOOP (
7558 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7559 imm_use_iterator imm_iter;
7560 use_operand_p use_p;
7561 tree scalar_dest;
7562 gimple exit_phi;
7564 if (dump_enabled_p ())
7565 dump_printf_loc (MSG_NOTE, vect_location,
7566 "Record the vdef for outer-loop vectorization.\n");
7568 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7569 (to be used when vectorizing outer-loop stmts that use the DEF of
7570 STMT). */
7571 if (gimple_code (stmt) == GIMPLE_PHI)
7572 scalar_dest = PHI_RESULT (stmt);
7573 else
7574 scalar_dest = gimple_assign_lhs (stmt);
7576 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7578 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7580 exit_phi = USE_STMT (use_p);
7581 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7586 /* Handle stmts whose DEF is used outside the loop-nest that is
7587 being vectorized. */
7588 if (STMT_VINFO_LIVE_P (stmt_info)
7589 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7591 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7592 gcc_assert (done);
7595 if (vec_stmt)
7596 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7598 return is_store;
7602 /* Remove a group of stores (for SLP or interleaving), free their
7603 stmt_vec_info. */
7605 void
7606 vect_remove_stores (gimple first_stmt)
7608 gimple next = first_stmt;
7609 gimple tmp;
7610 gimple_stmt_iterator next_si;
7612 while (next)
7614 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7616 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7617 if (is_pattern_stmt_p (stmt_info))
7618 next = STMT_VINFO_RELATED_STMT (stmt_info);
7619 /* Free the attached stmt_vec_info and remove the stmt. */
7620 next_si = gsi_for_stmt (next);
7621 unlink_stmt_vdef (next);
7622 gsi_remove (&next_si, true);
7623 release_defs (next);
7624 free_stmt_vec_info (next);
7625 next = tmp;
7630 /* Function new_stmt_vec_info.
7632 Create and initialize a new stmt_vec_info struct for STMT. */
7634 stmt_vec_info
7635 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7636 bb_vec_info bb_vinfo)
7638 stmt_vec_info res;
7639 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7641 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7642 STMT_VINFO_STMT (res) = stmt;
7643 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7644 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7645 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7646 STMT_VINFO_LIVE_P (res) = false;
7647 STMT_VINFO_VECTYPE (res) = NULL;
7648 STMT_VINFO_VEC_STMT (res) = NULL;
7649 STMT_VINFO_VECTORIZABLE (res) = true;
7650 STMT_VINFO_IN_PATTERN_P (res) = false;
7651 STMT_VINFO_RELATED_STMT (res) = NULL;
7652 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7653 STMT_VINFO_DATA_REF (res) = NULL;
7655 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7656 STMT_VINFO_DR_OFFSET (res) = NULL;
7657 STMT_VINFO_DR_INIT (res) = NULL;
7658 STMT_VINFO_DR_STEP (res) = NULL;
7659 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7661 if (gimple_code (stmt) == GIMPLE_PHI
7662 && is_loop_header_bb_p (gimple_bb (stmt)))
7663 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7664 else
7665 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7667 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7668 STMT_SLP_TYPE (res) = loop_vect;
7669 GROUP_FIRST_ELEMENT (res) = NULL;
7670 GROUP_NEXT_ELEMENT (res) = NULL;
7671 GROUP_SIZE (res) = 0;
7672 GROUP_STORE_COUNT (res) = 0;
7673 GROUP_GAP (res) = 0;
7674 GROUP_SAME_DR_STMT (res) = NULL;
7676 return res;
7680 /* Create a hash table for stmt_vec_info. */
7682 void
7683 init_stmt_vec_info_vec (void)
7685 gcc_assert (!stmt_vec_info_vec.exists ());
7686 stmt_vec_info_vec.create (50);
7690 /* Free hash table for stmt_vec_info. */
7692 void
7693 free_stmt_vec_info_vec (void)
7695 unsigned int i;
7696 vec_void_p info;
7697 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7698 if (info != NULL)
7699 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7700 gcc_assert (stmt_vec_info_vec.exists ());
7701 stmt_vec_info_vec.release ();
7705 /* Free stmt vectorization related info. */
7707 void
7708 free_stmt_vec_info (gimple stmt)
7710 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7712 if (!stmt_info)
7713 return;
7715 /* Check if this statement has a related "pattern stmt"
7716 (introduced by the vectorizer during the pattern recognition
7717 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7718 too. */
7719 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7721 stmt_vec_info patt_info
7722 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7723 if (patt_info)
7725 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7726 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7727 gimple_set_bb (patt_stmt, NULL);
7728 tree lhs = gimple_get_lhs (patt_stmt);
7729 if (TREE_CODE (lhs) == SSA_NAME)
7730 release_ssa_name (lhs);
7731 if (seq)
7733 gimple_stmt_iterator si;
7734 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7736 gimple seq_stmt = gsi_stmt (si);
7737 gimple_set_bb (seq_stmt, NULL);
7738 lhs = gimple_get_lhs (patt_stmt);
7739 if (TREE_CODE (lhs) == SSA_NAME)
7740 release_ssa_name (lhs);
7741 free_stmt_vec_info (seq_stmt);
7744 free_stmt_vec_info (patt_stmt);
7748 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7749 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7750 set_vinfo_for_stmt (stmt, NULL);
7751 free (stmt_info);
7755 /* Function get_vectype_for_scalar_type_and_size.
7757 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7758 by the target. */
7760 static tree
7761 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7763 machine_mode inner_mode = TYPE_MODE (scalar_type);
7764 machine_mode simd_mode;
7765 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7766 int nunits;
7767 tree vectype;
7769 if (nbytes == 0)
7770 return NULL_TREE;
7772 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7773 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7774 return NULL_TREE;
7776 /* For vector types of elements whose mode precision doesn't
7777 match their types precision we use a element type of mode
7778 precision. The vectorization routines will have to make sure
7779 they support the proper result truncation/extension.
7780 We also make sure to build vector types with INTEGER_TYPE
7781 component type only. */
7782 if (INTEGRAL_TYPE_P (scalar_type)
7783 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7784 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7785 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7786 TYPE_UNSIGNED (scalar_type));
7788 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7789 When the component mode passes the above test simply use a type
7790 corresponding to that mode. The theory is that any use that
7791 would cause problems with this will disable vectorization anyway. */
7792 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7793 && !INTEGRAL_TYPE_P (scalar_type))
7794 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7796 /* We can't build a vector type of elements with alignment bigger than
7797 their size. */
7798 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7799 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7800 TYPE_UNSIGNED (scalar_type));
7802 /* If we felt back to using the mode fail if there was
7803 no scalar type for it. */
7804 if (scalar_type == NULL_TREE)
7805 return NULL_TREE;
7807 /* If no size was supplied use the mode the target prefers. Otherwise
7808 lookup a vector mode of the specified size. */
7809 if (size == 0)
7810 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7811 else
7812 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7813 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7814 if (nunits <= 1)
7815 return NULL_TREE;
7817 vectype = build_vector_type (scalar_type, nunits);
7819 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7820 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7821 return NULL_TREE;
7823 return vectype;
7826 unsigned int current_vector_size;
7828 /* Function get_vectype_for_scalar_type.
7830 Returns the vector type corresponding to SCALAR_TYPE as supported
7831 by the target. */
7833 tree
7834 get_vectype_for_scalar_type (tree scalar_type)
7836 tree vectype;
7837 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7838 current_vector_size);
7839 if (vectype
7840 && current_vector_size == 0)
7841 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7842 return vectype;
7845 /* Function get_same_sized_vectype
7847 Returns a vector type corresponding to SCALAR_TYPE of size
7848 VECTOR_TYPE if supported by the target. */
7850 tree
7851 get_same_sized_vectype (tree scalar_type, tree vector_type)
7853 return get_vectype_for_scalar_type_and_size
7854 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7857 /* Function vect_is_simple_use.
7859 Input:
7860 LOOP_VINFO - the vect info of the loop that is being vectorized.
7861 BB_VINFO - the vect info of the basic block that is being vectorized.
7862 OPERAND - operand of STMT in the loop or bb.
7863 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7865 Returns whether a stmt with OPERAND can be vectorized.
7866 For loops, supportable operands are constants, loop invariants, and operands
7867 that are defined by the current iteration of the loop. Unsupportable
7868 operands are those that are defined by a previous iteration of the loop (as
7869 is the case in reduction/induction computations).
7870 For basic blocks, supportable operands are constants and bb invariants.
7871 For now, operands defined outside the basic block are not supported. */
7873 bool
7874 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7875 bb_vec_info bb_vinfo, gimple *def_stmt,
7876 tree *def, enum vect_def_type *dt)
7878 *def_stmt = NULL;
7879 *def = NULL_TREE;
7880 *dt = vect_unknown_def_type;
7882 if (dump_enabled_p ())
7884 dump_printf_loc (MSG_NOTE, vect_location,
7885 "vect_is_simple_use: operand ");
7886 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7887 dump_printf (MSG_NOTE, "\n");
7890 if (CONSTANT_CLASS_P (operand))
7892 *dt = vect_constant_def;
7893 return true;
7896 if (is_gimple_min_invariant (operand))
7898 *def = operand;
7899 *dt = vect_external_def;
7900 return true;
7903 if (TREE_CODE (operand) != SSA_NAME)
7905 if (dump_enabled_p ())
7906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7907 "not ssa-name.\n");
7908 return false;
7911 if (SSA_NAME_IS_DEFAULT_DEF (operand))
7913 *def = operand;
7914 *dt = vect_external_def;
7915 return true;
7918 *def_stmt = SSA_NAME_DEF_STMT (operand);
7919 if (dump_enabled_p ())
7921 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7922 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7925 basic_block bb = gimple_bb (*def_stmt);
7926 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
7927 || (bb_vinfo
7928 && (bb != BB_VINFO_BB (bb_vinfo)
7929 || gimple_code (*def_stmt) == GIMPLE_PHI)))
7930 *dt = vect_external_def;
7931 else
7933 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
7934 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
7935 *dt = vect_external_def;
7936 else
7937 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7940 if (dump_enabled_p ())
7942 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7943 switch (*dt)
7945 case vect_uninitialized_def:
7946 dump_printf (MSG_NOTE, "uninitialized\n");
7947 break;
7948 case vect_constant_def:
7949 dump_printf (MSG_NOTE, "constant\n");
7950 break;
7951 case vect_external_def:
7952 dump_printf (MSG_NOTE, "external\n");
7953 break;
7954 case vect_internal_def:
7955 dump_printf (MSG_NOTE, "internal\n");
7956 break;
7957 case vect_induction_def:
7958 dump_printf (MSG_NOTE, "induction\n");
7959 break;
7960 case vect_reduction_def:
7961 dump_printf (MSG_NOTE, "reduction\n");
7962 break;
7963 case vect_double_reduction_def:
7964 dump_printf (MSG_NOTE, "double reduction\n");
7965 break;
7966 case vect_nested_cycle:
7967 dump_printf (MSG_NOTE, "nested cycle\n");
7968 break;
7969 case vect_unknown_def_type:
7970 dump_printf (MSG_NOTE, "unknown\n");
7971 break;
7975 if (*dt == vect_unknown_def_type
7976 || (stmt
7977 && *dt == vect_double_reduction_def
7978 && gimple_code (stmt) != GIMPLE_PHI))
7980 if (dump_enabled_p ())
7981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7982 "Unsupported pattern.\n");
7983 return false;
7986 switch (gimple_code (*def_stmt))
7988 case GIMPLE_PHI:
7989 *def = gimple_phi_result (*def_stmt);
7990 break;
7992 case GIMPLE_ASSIGN:
7993 *def = gimple_assign_lhs (*def_stmt);
7994 break;
7996 case GIMPLE_CALL:
7997 *def = gimple_call_lhs (*def_stmt);
7998 if (*def != NULL)
7999 break;
8000 /* FALLTHRU */
8001 default:
8002 if (dump_enabled_p ())
8003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8004 "unsupported defining stmt:\n");
8005 return false;
8008 return true;
8011 /* Function vect_is_simple_use_1.
8013 Same as vect_is_simple_use_1 but also determines the vector operand
8014 type of OPERAND and stores it to *VECTYPE. If the definition of
8015 OPERAND is vect_uninitialized_def, vect_constant_def or
8016 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8017 is responsible to compute the best suited vector type for the
8018 scalar operand. */
8020 bool
8021 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8022 bb_vec_info bb_vinfo, gimple *def_stmt,
8023 tree *def, enum vect_def_type *dt, tree *vectype)
8025 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8026 def, dt))
8027 return false;
8029 /* Now get a vector type if the def is internal, otherwise supply
8030 NULL_TREE and leave it up to the caller to figure out a proper
8031 type for the use stmt. */
8032 if (*dt == vect_internal_def
8033 || *dt == vect_induction_def
8034 || *dt == vect_reduction_def
8035 || *dt == vect_double_reduction_def
8036 || *dt == vect_nested_cycle)
8038 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8040 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8041 && !STMT_VINFO_RELEVANT (stmt_info)
8042 && !STMT_VINFO_LIVE_P (stmt_info))
8043 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8045 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8046 gcc_assert (*vectype != NULL_TREE);
8048 else if (*dt == vect_uninitialized_def
8049 || *dt == vect_constant_def
8050 || *dt == vect_external_def)
8051 *vectype = NULL_TREE;
8052 else
8053 gcc_unreachable ();
8055 return true;
8059 /* Function supportable_widening_operation
8061 Check whether an operation represented by the code CODE is a
8062 widening operation that is supported by the target platform in
8063 vector form (i.e., when operating on arguments of type VECTYPE_IN
8064 producing a result of type VECTYPE_OUT).
8066 Widening operations we currently support are NOP (CONVERT), FLOAT
8067 and WIDEN_MULT. This function checks if these operations are supported
8068 by the target platform either directly (via vector tree-codes), or via
8069 target builtins.
8071 Output:
8072 - CODE1 and CODE2 are codes of vector operations to be used when
8073 vectorizing the operation, if available.
8074 - MULTI_STEP_CVT determines the number of required intermediate steps in
8075 case of multi-step conversion (like char->short->int - in that case
8076 MULTI_STEP_CVT will be 1).
8077 - INTERM_TYPES contains the intermediate type required to perform the
8078 widening operation (short in the above example). */
8080 bool
8081 supportable_widening_operation (enum tree_code code, gimple stmt,
8082 tree vectype_out, tree vectype_in,
8083 enum tree_code *code1, enum tree_code *code2,
8084 int *multi_step_cvt,
8085 vec<tree> *interm_types)
8087 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8088 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8089 struct loop *vect_loop = NULL;
8090 machine_mode vec_mode;
8091 enum insn_code icode1, icode2;
8092 optab optab1, optab2;
8093 tree vectype = vectype_in;
8094 tree wide_vectype = vectype_out;
8095 enum tree_code c1, c2;
8096 int i;
8097 tree prev_type, intermediate_type;
8098 machine_mode intermediate_mode, prev_mode;
8099 optab optab3, optab4;
8101 *multi_step_cvt = 0;
8102 if (loop_info)
8103 vect_loop = LOOP_VINFO_LOOP (loop_info);
8105 switch (code)
8107 case WIDEN_MULT_EXPR:
8108 /* The result of a vectorized widening operation usually requires
8109 two vectors (because the widened results do not fit into one vector).
8110 The generated vector results would normally be expected to be
8111 generated in the same order as in the original scalar computation,
8112 i.e. if 8 results are generated in each vector iteration, they are
8113 to be organized as follows:
8114 vect1: [res1,res2,res3,res4],
8115 vect2: [res5,res6,res7,res8].
8117 However, in the special case that the result of the widening
8118 operation is used in a reduction computation only, the order doesn't
8119 matter (because when vectorizing a reduction we change the order of
8120 the computation). Some targets can take advantage of this and
8121 generate more efficient code. For example, targets like Altivec,
8122 that support widen_mult using a sequence of {mult_even,mult_odd}
8123 generate the following vectors:
8124 vect1: [res1,res3,res5,res7],
8125 vect2: [res2,res4,res6,res8].
8127 When vectorizing outer-loops, we execute the inner-loop sequentially
8128 (each vectorized inner-loop iteration contributes to VF outer-loop
8129 iterations in parallel). We therefore don't allow to change the
8130 order of the computation in the inner-loop during outer-loop
8131 vectorization. */
8132 /* TODO: Another case in which order doesn't *really* matter is when we
8133 widen and then contract again, e.g. (short)((int)x * y >> 8).
8134 Normally, pack_trunc performs an even/odd permute, whereas the
8135 repack from an even/odd expansion would be an interleave, which
8136 would be significantly simpler for e.g. AVX2. */
8137 /* In any case, in order to avoid duplicating the code below, recurse
8138 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8139 are properly set up for the caller. If we fail, we'll continue with
8140 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8141 if (vect_loop
8142 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8143 && !nested_in_vect_loop_p (vect_loop, stmt)
8144 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8145 stmt, vectype_out, vectype_in,
8146 code1, code2, multi_step_cvt,
8147 interm_types))
8149 /* Elements in a vector with vect_used_by_reduction property cannot
8150 be reordered if the use chain with this property does not have the
8151 same operation. One such an example is s += a * b, where elements
8152 in a and b cannot be reordered. Here we check if the vector defined
8153 by STMT is only directly used in the reduction statement. */
8154 tree lhs = gimple_assign_lhs (stmt);
8155 use_operand_p dummy;
8156 gimple use_stmt;
8157 stmt_vec_info use_stmt_info = NULL;
8158 if (single_imm_use (lhs, &dummy, &use_stmt)
8159 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8160 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8161 return true;
8163 c1 = VEC_WIDEN_MULT_LO_EXPR;
8164 c2 = VEC_WIDEN_MULT_HI_EXPR;
8165 break;
8167 case VEC_WIDEN_MULT_EVEN_EXPR:
8168 /* Support the recursion induced just above. */
8169 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8170 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8171 break;
8173 case WIDEN_LSHIFT_EXPR:
8174 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8175 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8176 break;
8178 CASE_CONVERT:
8179 c1 = VEC_UNPACK_LO_EXPR;
8180 c2 = VEC_UNPACK_HI_EXPR;
8181 break;
8183 case FLOAT_EXPR:
8184 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8185 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8186 break;
8188 case FIX_TRUNC_EXPR:
8189 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8190 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8191 computing the operation. */
8192 return false;
8194 default:
8195 gcc_unreachable ();
8198 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8200 enum tree_code ctmp = c1;
8201 c1 = c2;
8202 c2 = ctmp;
8205 if (code == FIX_TRUNC_EXPR)
8207 /* The signedness is determined from output operand. */
8208 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8209 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8211 else
8213 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8214 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8217 if (!optab1 || !optab2)
8218 return false;
8220 vec_mode = TYPE_MODE (vectype);
8221 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8222 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8223 return false;
8225 *code1 = c1;
8226 *code2 = c2;
8228 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8229 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8230 return true;
8232 /* Check if it's a multi-step conversion that can be done using intermediate
8233 types. */
8235 prev_type = vectype;
8236 prev_mode = vec_mode;
8238 if (!CONVERT_EXPR_CODE_P (code))
8239 return false;
8241 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8242 intermediate steps in promotion sequence. We try
8243 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8244 not. */
8245 interm_types->create (MAX_INTERM_CVT_STEPS);
8246 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8248 intermediate_mode = insn_data[icode1].operand[0].mode;
8249 intermediate_type
8250 = lang_hooks.types.type_for_mode (intermediate_mode,
8251 TYPE_UNSIGNED (prev_type));
8252 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8253 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8255 if (!optab3 || !optab4
8256 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8257 || insn_data[icode1].operand[0].mode != intermediate_mode
8258 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8259 || insn_data[icode2].operand[0].mode != intermediate_mode
8260 || ((icode1 = optab_handler (optab3, intermediate_mode))
8261 == CODE_FOR_nothing)
8262 || ((icode2 = optab_handler (optab4, intermediate_mode))
8263 == CODE_FOR_nothing))
8264 break;
8266 interm_types->quick_push (intermediate_type);
8267 (*multi_step_cvt)++;
8269 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8270 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8271 return true;
8273 prev_type = intermediate_type;
8274 prev_mode = intermediate_mode;
8277 interm_types->release ();
8278 return false;
8282 /* Function supportable_narrowing_operation
8284 Check whether an operation represented by the code CODE is a
8285 narrowing operation that is supported by the target platform in
8286 vector form (i.e., when operating on arguments of type VECTYPE_IN
8287 and producing a result of type VECTYPE_OUT).
8289 Narrowing operations we currently support are NOP (CONVERT) and
8290 FIX_TRUNC. This function checks if these operations are supported by
8291 the target platform directly via vector tree-codes.
8293 Output:
8294 - CODE1 is the code of a vector operation to be used when
8295 vectorizing the operation, if available.
8296 - MULTI_STEP_CVT determines the number of required intermediate steps in
8297 case of multi-step conversion (like int->short->char - in that case
8298 MULTI_STEP_CVT will be 1).
8299 - INTERM_TYPES contains the intermediate type required to perform the
8300 narrowing operation (short in the above example). */
8302 bool
8303 supportable_narrowing_operation (enum tree_code code,
8304 tree vectype_out, tree vectype_in,
8305 enum tree_code *code1, int *multi_step_cvt,
8306 vec<tree> *interm_types)
8308 machine_mode vec_mode;
8309 enum insn_code icode1;
8310 optab optab1, interm_optab;
8311 tree vectype = vectype_in;
8312 tree narrow_vectype = vectype_out;
8313 enum tree_code c1;
8314 tree intermediate_type;
8315 machine_mode intermediate_mode, prev_mode;
8316 int i;
8317 bool uns;
8319 *multi_step_cvt = 0;
8320 switch (code)
8322 CASE_CONVERT:
8323 c1 = VEC_PACK_TRUNC_EXPR;
8324 break;
8326 case FIX_TRUNC_EXPR:
8327 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8328 break;
8330 case FLOAT_EXPR:
8331 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8332 tree code and optabs used for computing the operation. */
8333 return false;
8335 default:
8336 gcc_unreachable ();
8339 if (code == FIX_TRUNC_EXPR)
8340 /* The signedness is determined from output operand. */
8341 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8342 else
8343 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8345 if (!optab1)
8346 return false;
8348 vec_mode = TYPE_MODE (vectype);
8349 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8350 return false;
8352 *code1 = c1;
8354 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8355 return true;
8357 /* Check if it's a multi-step conversion that can be done using intermediate
8358 types. */
8359 prev_mode = vec_mode;
8360 if (code == FIX_TRUNC_EXPR)
8361 uns = TYPE_UNSIGNED (vectype_out);
8362 else
8363 uns = TYPE_UNSIGNED (vectype);
8365 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8366 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8367 costly than signed. */
8368 if (code == FIX_TRUNC_EXPR && uns)
8370 enum insn_code icode2;
8372 intermediate_type
8373 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8374 interm_optab
8375 = optab_for_tree_code (c1, intermediate_type, optab_default);
8376 if (interm_optab != unknown_optab
8377 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8378 && insn_data[icode1].operand[0].mode
8379 == insn_data[icode2].operand[0].mode)
8381 uns = false;
8382 optab1 = interm_optab;
8383 icode1 = icode2;
8387 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8388 intermediate steps in promotion sequence. We try
8389 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8390 interm_types->create (MAX_INTERM_CVT_STEPS);
8391 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8393 intermediate_mode = insn_data[icode1].operand[0].mode;
8394 intermediate_type
8395 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8396 interm_optab
8397 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8398 optab_default);
8399 if (!interm_optab
8400 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8401 || insn_data[icode1].operand[0].mode != intermediate_mode
8402 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8403 == CODE_FOR_nothing))
8404 break;
8406 interm_types->quick_push (intermediate_type);
8407 (*multi_step_cvt)++;
8409 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8410 return true;
8412 prev_mode = intermediate_mode;
8413 optab1 = interm_optab;
8416 interm_types->release ();
8417 return false;