* lto-partition.c (add_symbol_to_partition_1,
[official-gcc.git] / gcc / tree-vect-stmts.c
blob7e47feb61fcce1e6ea74d8962fe4a8204e7f938b
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "tree-eh.h"
35 #include "gimple-expr.h"
36 #include "is-a.h"
37 #include "gimple.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-ssa.h"
42 #include "tree-cfg.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-manip.h"
48 #include "cfgloop.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
51 #include "expr.h"
52 #include "recog.h" /* FIXME: for insn_data */
53 #include "optabs.h"
54 #include "diagnostic-core.h"
55 #include "tree-vectorizer.h"
56 #include "dumpfile.h"
57 #include "cgraph.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (struct _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
75 gimple stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 struct loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if (body_cost_vec)
99 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
100 add_stmt_info_to_vec (body_cost_vec, count, kind,
101 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
102 misalign);
103 return (unsigned)
104 (builtin_vectorization_cost (kind, vectype, misalign) * count);
107 else
109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
111 void *target_cost_data;
113 if (loop_vinfo)
114 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
115 else
116 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
118 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
119 misalign, where);
123 /* Return a variable of type ELEM_TYPE[NELEMS]. */
125 static tree
126 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
128 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
129 "vect_array");
132 /* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
137 static tree
138 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
139 tree array, unsigned HOST_WIDE_INT n)
141 tree vect_type, vect, vect_name, array_ref;
142 gimple new_stmt;
144 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
145 vect_type = TREE_TYPE (TREE_TYPE (array));
146 vect = vect_create_destination_var (scalar_dest, vect_type);
147 array_ref = build4 (ARRAY_REF, vect_type, array,
148 build_int_cst (size_type_node, n),
149 NULL_TREE, NULL_TREE);
151 new_stmt = gimple_build_assign (vect, array_ref);
152 vect_name = make_ssa_name (vect, new_stmt);
153 gimple_assign_set_lhs (new_stmt, vect_name);
154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
156 return vect_name;
159 /* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
163 static void
164 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
165 tree array, unsigned HOST_WIDE_INT n)
167 tree array_ref;
168 gimple new_stmt;
170 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (array_ref, vect);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
178 /* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
180 (and its group). */
182 static tree
183 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
185 tree mem_ref, alias_ptr_type;
187 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
188 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
189 /* Arrays have the same alignment as their type. */
190 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
191 return mem_ref;
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
200 static void
201 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
202 enum vect_relevant relevant, bool live_p,
203 bool used_in_pattern)
205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208 gimple pattern_stmt;
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE, vect_location,
212 "mark relevant %d, live %d.\n", relevant, live_p);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 bool found = false;
221 if (!used_in_pattern)
223 imm_use_iterator imm_iter;
224 use_operand_p use_p;
225 gimple use_stmt;
226 tree lhs;
227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 if (is_gimple_assign (stmt))
231 lhs = gimple_assign_lhs (stmt);
232 else
233 lhs = gimple_call_lhs (stmt);
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
237 stmt. */
238 if (lhs && TREE_CODE (lhs) == SSA_NAME)
239 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
241 if (is_gimple_debug (USE_STMT (use_p)))
242 continue;
243 use_stmt = USE_STMT (use_p);
245 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
246 continue;
248 if (vinfo_for_stmt (use_stmt)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
251 found = true;
252 break;
257 if (!found)
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_NOTE, vect_location,
268 "last stmt in pattern. don't mark"
269 " relevant/live.\n");
270 stmt_info = vinfo_for_stmt (pattern_stmt);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
272 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
273 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
274 stmt = pattern_stmt;
278 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
279 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
280 STMT_VINFO_RELEVANT (stmt_info) = relevant;
282 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
285 if (dump_enabled_p ())
286 dump_printf_loc (MSG_NOTE, vect_location,
287 "already marked relevant/live.\n");
288 return;
291 worklist->safe_push (stmt);
295 /* Function vect_stmt_relevant_p.
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
305 CHECKME: what other side effects would the vectorizer allow? */
307 static bool
308 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
309 enum vect_relevant *relevant, bool *live_p)
311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
312 ssa_op_iter op_iter;
313 imm_use_iterator imm_iter;
314 use_operand_p use_p;
315 def_operand_p def_p;
317 *relevant = vect_unused_in_scope;
318 *live_p = false;
320 /* cond stmt other than loop exit cond. */
321 if (is_ctrl_stmt (stmt)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
323 != loop_exit_ctrl_vec_info_type)
324 *relevant = vect_used_in_scope;
326 /* changing memory. */
327 if (gimple_code (stmt) != GIMPLE_PHI)
328 if (gimple_vdef (stmt))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: stmt has vdefs.\n");
333 *relevant = vect_used_in_scope;
336 /* uses outside the loop. */
337 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
339 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
341 basic_block bb = gimple_bb (USE_STMT (use_p));
342 if (!flow_bb_inside_loop_p (loop, bb))
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE, vect_location,
346 "vec_stmt_relevant_p: used out of loop.\n");
348 if (is_gimple_debug (USE_STMT (use_p)))
349 continue;
351 /* We expect all such uses to be in the loop exit phis
352 (because of loop closed form) */
353 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
354 gcc_assert (bb == single_exit (loop)->dest);
356 *live_p = true;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
390 for array indexing.
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
395 if (!gimple_assign_copy_p (stmt))
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
414 return false;
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
419 operand = gimple_assign_rhs1 (stmt);
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
423 if (operand == use)
424 return true;
426 return false;
431 Function process_use.
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
457 static bool
458 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
459 enum vect_relevant relevant, vec<gimple> *worklist,
460 bool force)
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 tree def;
467 gimple def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
581 case vect_used_in_scope:
582 relevant = vect_used_in_outer;
583 break;
585 default:
586 gcc_unreachable ();
590 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
591 is_pattern_stmt_p (stmt_vinfo));
592 return true;
596 /* Function vect_mark_stmts_to_be_vectorized.
598 Not all stmts in the loop need to be vectorized. For example:
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
605 3. j = j + 1
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
610 This pass detects such stmts. */
612 bool
613 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
619 gimple stmt;
620 unsigned int i;
621 stmt_vec_info stmt_vinfo;
622 basic_block bb;
623 gimple phi;
624 bool live_p;
625 enum vect_relevant relevant, tmp_relevant;
626 enum vect_def_type def_type;
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location,
630 "=== vect_mark_stmts_to_be_vectorized ===\n");
632 auto_vec<gimple, 64> worklist;
634 /* 1. Init worklist. */
635 for (i = 0; i < nbbs; i++)
637 bb = bbs[i];
638 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
640 phi = gsi_stmt (si);
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
645 dump_printf (MSG_NOTE, "\n");
648 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
649 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
651 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
653 stmt = gsi_stmt (si);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
658 dump_printf (MSG_NOTE, "\n");
661 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
662 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
666 /* 2. Process_worklist */
667 while (worklist.length () > 0)
669 use_operand_p use_p;
670 ssa_op_iter iter;
672 stmt = worklist.pop ();
673 if (dump_enabled_p ())
675 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
677 dump_printf (MSG_NOTE, "\n");
680 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
681 (DEF_STMT) as relevant/irrelevant and live/dead according to the
682 liveness and relevance properties of STMT. */
683 stmt_vinfo = vinfo_for_stmt (stmt);
684 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
685 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
687 /* Generally, the liveness and relevance properties of STMT are
688 propagated as is to the DEF_STMTs of its USEs:
689 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
690 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
692 One exception is when STMT has been identified as defining a reduction
693 variable; in this case we set the liveness/relevance as follows:
694 live_p = false
695 relevant = vect_used_by_reduction
696 This is because we distinguish between two kinds of relevant stmts -
697 those that are used by a reduction computation, and those that are
698 (also) used by a regular computation. This allows us later on to
699 identify stmts that are used solely by a reduction, and therefore the
700 order of the results that they produce does not have to be kept. */
702 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
703 tmp_relevant = relevant;
704 switch (def_type)
706 case vect_reduction_def:
707 switch (tmp_relevant)
709 case vect_unused_in_scope:
710 relevant = vect_used_by_reduction;
711 break;
713 case vect_used_by_reduction:
714 if (gimple_code (stmt) == GIMPLE_PHI)
715 break;
716 /* fall through */
718 default:
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of reduction.\n");
722 return false;
725 live_p = false;
726 break;
728 case vect_nested_cycle:
729 if (tmp_relevant != vect_unused_in_scope
730 && tmp_relevant != vect_used_in_outer_by_reduction
731 && tmp_relevant != vect_used_in_outer)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of nested cycle.\n");
737 return false;
740 live_p = false;
741 break;
743 case vect_double_reduction_def:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_by_reduction)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
749 "unsupported use of double reduction.\n");
751 return false;
754 live_p = false;
755 break;
757 default:
758 break;
761 if (is_pattern_stmt_p (stmt_vinfo))
763 /* Pattern statements are not inserted into the code, so
764 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
765 have to scan the RHS or function arguments instead. */
766 if (is_gimple_assign (stmt))
768 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
769 tree op = gimple_assign_rhs1 (stmt);
771 i = 1;
772 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
774 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
775 live_p, relevant, &worklist, false)
776 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
777 live_p, relevant, &worklist, false))
778 return false;
779 i = 2;
781 for (; i < gimple_num_ops (stmt); i++)
783 op = gimple_op (stmt, i);
784 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
785 &worklist, false))
786 return false;
789 else if (is_gimple_call (stmt))
791 for (i = 0; i < gimple_call_num_args (stmt); i++)
793 tree arg = gimple_call_arg (stmt, i);
794 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
795 &worklist, false))
796 return false;
800 else
801 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
803 tree op = USE_FROM_PTR (use_p);
804 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
805 &worklist, false))
806 return false;
809 if (STMT_VINFO_GATHER_P (stmt_vinfo))
811 tree off;
812 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
813 gcc_assert (decl);
814 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
815 &worklist, true))
816 return false;
818 } /* while worklist */
820 return true;
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 void
831 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
832 enum vect_def_type *dt,
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
836 int i;
837 int inside_cost = 0, prologue_cost = 0;
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info))
841 return;
843 /* FORNOW: Assuming maximum 2 args per stmts. */
844 for (i = 0; i < 2; i++)
845 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
846 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
847 stmt_info, 0, vect_prologue);
849 /* Pass the inside-of-loop statements to the target-specific cost model. */
850 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
851 stmt_info, 0, vect_body);
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_NOTE, vect_location,
855 "vect_model_simple_cost: inside_cost = %d, "
856 "prologue_cost = %d .\n", inside_cost, prologue_cost);
860 /* Model cost for type demotion and promotion operations. PWR is normally
861 zero for single-step promotions and demotions. It will be one if
862 two-step promotion/demotion is required, and so on. Each additional
863 step doubles the number of instructions required. */
865 static void
866 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
867 enum vect_def_type *dt, int pwr)
869 int i, tmp;
870 int inside_cost = 0, prologue_cost = 0;
871 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
872 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
873 void *target_cost_data;
875 /* The SLP costs were already calculated during SLP tree build. */
876 if (PURE_SLP_STMT (stmt_info))
877 return;
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
884 for (i = 0; i < pwr + 1; i++)
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
899 if (dump_enabled_p ())
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
905 /* Function vect_cost_group_size
907 For grouped load or store, return the group_size only if it is the first
908 load or store of a group, else return 1. This ensures that group size is
909 only returned once per group. */
911 static int
912 vect_cost_group_size (stmt_vec_info stmt_info)
914 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
916 if (first_stmt == STMT_VINFO_STMT (stmt_info))
917 return GROUP_SIZE (stmt_info);
919 return 1;
923 /* Function vect_model_store_cost
925 Models cost for stores. In the case of grouped accesses, one access
926 has the overhead of the grouped access attributed to it. */
928 void
929 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
930 bool store_lanes_p, enum vect_def_type dt,
931 slp_tree slp_node,
932 stmt_vector_for_cost *prologue_cost_vec,
933 stmt_vector_for_cost *body_cost_vec)
935 int group_size;
936 unsigned int inside_cost = 0, prologue_cost = 0;
937 struct data_reference *first_dr;
938 gimple first_stmt;
940 /* The SLP costs were already calculated during SLP tree build. */
941 if (PURE_SLP_STMT (stmt_info))
942 return;
944 if (dt == vect_constant_def || dt == vect_external_def)
945 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
946 stmt_info, 0, vect_prologue);
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
951 if (slp_node)
953 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
954 group_size = 1;
956 else
958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
959 group_size = vect_cost_group_size (stmt_info);
962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
964 /* Not a grouped access. */
965 else
967 group_size = 1;
968 first_dr = STMT_VINFO_DATA_REF (stmt_info);
971 /* We assume that the cost of a single store-lanes instruction is
972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p && group_size > 1)
977 /* Uses a high and low interleave operation for each needed permute. */
979 int nstmts = ncopies * exact_log2 (group_size) * group_size;
980 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
981 stmt_info, 0, vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: strided group_size = %d .\n",
986 group_size);
989 /* Costs of the stores. */
990 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: inside_cost = %d, "
995 "prologue_cost = %d .\n", inside_cost, prologue_cost);
999 /* Calculate cost of DR's memory access. */
1000 void
1001 vect_get_store_cost (struct data_reference *dr, int ncopies,
1002 unsigned int *inside_cost,
1003 stmt_vector_for_cost *body_cost_vec)
1005 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1006 gimple stmt = DR_STMT (dr);
1007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1009 switch (alignment_support_scheme)
1011 case dr_aligned:
1013 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1014 vector_store, stmt_info, 0,
1015 vect_body);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: aligned.\n");
1020 break;
1023 case dr_unaligned_supported:
1025 /* Here, we assign an additional cost for the unaligned store. */
1026 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1027 unaligned_store, stmt_info,
1028 DR_MISALIGNMENT (dr), vect_body);
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "vect_model_store_cost: unaligned supported by "
1032 "hardware.\n");
1033 break;
1036 case dr_unaligned_unsupported:
1038 *inside_cost = VECT_MAX_COST;
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 "vect_model_store_cost: unsupported access.\n");
1043 break;
1046 default:
1047 gcc_unreachable ();
1052 /* Function vect_model_load_cost
1054 Models cost for loads. In the case of grouped accesses, the last access
1055 has the overhead of the grouped access attributed to it. Since unaligned
1056 accesses are supported for loads, we also account for the costs of the
1057 access scheme chosen. */
1059 void
1060 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1061 bool load_lanes_p, slp_tree slp_node,
1062 stmt_vector_for_cost *prologue_cost_vec,
1063 stmt_vector_for_cost *body_cost_vec)
1065 int group_size;
1066 gimple first_stmt;
1067 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1068 unsigned int inside_cost = 0, prologue_cost = 0;
1070 /* The SLP costs were already calculated during SLP tree build. */
1071 if (PURE_SLP_STMT (stmt_info))
1072 return;
1074 /* Grouped accesses? */
1075 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1078 group_size = vect_cost_group_size (stmt_info);
1079 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1081 /* Not a grouped access. */
1082 else
1084 group_size = 1;
1085 first_dr = dr;
1088 /* We assume that the cost of a single load-lanes instruction is
1089 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1090 access is instead being provided by a load-and-permute operation,
1091 include the cost of the permutes. */
1092 if (!load_lanes_p && group_size > 1)
1094 /* Uses an even and odd extract operations for each needed permute. */
1095 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1096 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1097 stmt_info, 0, vect_body);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_load_cost: strided group_size = %d .\n",
1102 group_size);
1105 /* The loads themselves. */
1106 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1108 /* N scalar loads plus gathering them into a vector. */
1109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1110 inside_cost += record_stmt_cost (body_cost_vec,
1111 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1112 scalar_load, stmt_info, 0, vect_body);
1113 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1114 stmt_info, 0, vect_body);
1116 else
1117 vect_get_load_cost (first_dr, ncopies,
1118 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1119 || group_size > 1 || slp_node),
1120 &inside_cost, &prologue_cost,
1121 prologue_cost_vec, body_cost_vec, true);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: inside_cost = %d, "
1126 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1130 /* Calculate cost of DR's memory access. */
1131 void
1132 vect_get_load_cost (struct data_reference *dr, int ncopies,
1133 bool add_realign_cost, unsigned int *inside_cost,
1134 unsigned int *prologue_cost,
1135 stmt_vector_for_cost *prologue_cost_vec,
1136 stmt_vector_for_cost *body_cost_vec,
1137 bool record_prologue_costs)
1139 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1140 gimple stmt = DR_STMT (dr);
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1143 switch (alignment_support_scheme)
1145 case dr_aligned:
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1148 stmt_info, 0, vect_body);
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_NOTE, vect_location,
1152 "vect_model_load_cost: aligned.\n");
1154 break;
1156 case dr_unaligned_supported:
1158 /* Here, we assign an additional cost for the unaligned load. */
1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1160 unaligned_load, stmt_info,
1161 DR_MISALIGNMENT (dr), vect_body);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: unaligned supported by "
1166 "hardware.\n");
1168 break;
1170 case dr_explicit_realign:
1172 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1173 vector_load, stmt_info, 0, vect_body);
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 vec_perm, stmt_info, 0, vect_body);
1177 /* FIXME: If the misalignment remains fixed across the iterations of
1178 the containing loop, the following cost should be added to the
1179 prologue costs. */
1180 if (targetm.vectorize.builtin_mask_for_load)
1181 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1182 stmt_info, 0, vect_body);
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE, vect_location,
1186 "vect_model_load_cost: explicit realign\n");
1188 break;
1190 case dr_explicit_realign_optimized:
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: unaligned software "
1195 "pipelined.\n");
1197 /* Unaligned software pipeline has a load of an address, an initial
1198 load, and possibly a mask operation to "prime" the loop. However,
1199 if this is an access in a group of loads, which provide grouped
1200 access, then the above cost should only be considered for one
1201 access in the group. Inside the loop, there is a load op
1202 and a realignment op. */
1204 if (add_realign_cost && record_prologue_costs)
1206 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1207 vector_stmt, stmt_info,
1208 0, vect_prologue);
1209 if (targetm.vectorize.builtin_mask_for_load)
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1216 stmt_info, 0, vect_body);
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1218 stmt_info, 0, vect_body);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: explicit realign optimized"
1223 "\n");
1225 break;
1228 case dr_unaligned_unsupported:
1230 *inside_cost = VECT_MAX_COST;
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1234 "vect_model_load_cost: unsupported access.\n");
1235 break;
1238 default:
1239 gcc_unreachable ();
1243 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1244 the loop preheader for the vectorized stmt STMT. */
1246 static void
1247 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1249 if (gsi)
1250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1251 else
1253 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1254 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1256 if (loop_vinfo)
1258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1259 basic_block new_bb;
1260 edge pe;
1262 if (nested_in_vect_loop_p (loop, stmt))
1263 loop = loop->inner;
1265 pe = loop_preheader_edge (loop);
1266 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1267 gcc_assert (!new_bb);
1269 else
1271 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1272 basic_block bb;
1273 gimple_stmt_iterator gsi_bb_start;
1275 gcc_assert (bb_vinfo);
1276 bb = BB_VINFO_BB (bb_vinfo);
1277 gsi_bb_start = gsi_after_labels (bb);
1278 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1282 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "created new init_stmt: ");
1286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1287 dump_printf (MSG_NOTE, "\n");
1291 /* Function vect_init_vector.
1293 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1294 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1295 vector type a vector with all elements equal to VAL is created first.
1296 Place the initialization at BSI if it is not NULL. Otherwise, place the
1297 initialization at the loop preheader.
1298 Return the DEF of INIT_STMT.
1299 It will be used in the vectorization of STMT. */
1301 tree
1302 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1304 tree new_var;
1305 gimple init_stmt;
1306 tree vec_oprnd;
1307 tree new_temp;
1309 if (TREE_CODE (type) == VECTOR_TYPE
1310 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1312 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1314 if (CONSTANT_CLASS_P (val))
1315 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1316 else
1318 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1319 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1320 new_temp, val,
1321 NULL_TREE);
1322 vect_init_vector_1 (stmt, init_stmt, gsi);
1323 val = new_temp;
1326 val = build_vector_from_val (type, val);
1329 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1330 init_stmt = gimple_build_assign (new_var, val);
1331 new_temp = make_ssa_name (new_var, init_stmt);
1332 gimple_assign_set_lhs (init_stmt, new_temp);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 vec_oprnd = gimple_assign_lhs (init_stmt);
1335 return vec_oprnd;
1339 /* Function vect_get_vec_def_for_operand.
1341 OP is an operand in STMT. This function returns a (vector) def that will be
1342 used in the vectorized stmt for STMT.
1344 In the case that OP is an SSA_NAME which is defined in the loop, then
1345 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1347 In case OP is an invariant or constant, a new stmt that creates a vector def
1348 needs to be introduced. */
1350 tree
1351 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1353 tree vec_oprnd;
1354 gimple vec_stmt;
1355 gimple def_stmt;
1356 stmt_vec_info def_stmt_info = NULL;
1357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1358 unsigned int nunits;
1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1360 tree def;
1361 enum vect_def_type dt;
1362 bool is_simple_use;
1363 tree vector_type;
1365 if (dump_enabled_p ())
1367 dump_printf_loc (MSG_NOTE, vect_location,
1368 "vect_get_vec_def_for_operand: ");
1369 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1370 dump_printf (MSG_NOTE, "\n");
1373 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1374 &def_stmt, &def, &dt);
1375 gcc_assert (is_simple_use);
1376 if (dump_enabled_p ())
1378 int loc_printed = 0;
1379 if (def)
1381 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1382 loc_printed = 1;
1383 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1384 dump_printf (MSG_NOTE, "\n");
1386 if (def_stmt)
1388 if (loc_printed)
1389 dump_printf (MSG_NOTE, " def_stmt = ");
1390 else
1391 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1393 dump_printf (MSG_NOTE, "\n");
1397 switch (dt)
1399 /* Case 1: operand is a constant. */
1400 case vect_constant_def:
1402 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1403 gcc_assert (vector_type);
1404 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1406 if (scalar_def)
1407 *scalar_def = op;
1409 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1410 if (dump_enabled_p ())
1411 dump_printf_loc (MSG_NOTE, vect_location,
1412 "Create vector_cst. nunits = %d\n", nunits);
1414 return vect_init_vector (stmt, op, vector_type, NULL);
1417 /* Case 2: operand is defined outside the loop - loop invariant. */
1418 case vect_external_def:
1420 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1421 gcc_assert (vector_type);
1423 if (scalar_def)
1424 *scalar_def = def;
1426 /* Create 'vec_inv = {inv,inv,..,inv}' */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1430 return vect_init_vector (stmt, def, vector_type, NULL);
1433 /* Case 3: operand is defined inside the loop. */
1434 case vect_internal_def:
1436 if (scalar_def)
1437 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1439 /* Get the def from the vectorized stmt. */
1440 def_stmt_info = vinfo_for_stmt (def_stmt);
1442 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1443 /* Get vectorized pattern statement. */
1444 if (!vec_stmt
1445 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1446 && !STMT_VINFO_RELEVANT (def_stmt_info))
1447 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1448 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1449 gcc_assert (vec_stmt);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else if (is_gimple_call (vec_stmt))
1453 vec_oprnd = gimple_call_lhs (vec_stmt);
1454 else
1455 vec_oprnd = gimple_assign_lhs (vec_stmt);
1456 return vec_oprnd;
1459 /* Case 4: operand is defined by a loop header phi - reduction */
1460 case vect_reduction_def:
1461 case vect_double_reduction_def:
1462 case vect_nested_cycle:
1464 struct loop *loop;
1466 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1467 loop = (gimple_bb (def_stmt))->loop_father;
1469 /* Get the def before the loop */
1470 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1471 return get_initial_def_for_reduction (stmt, op, scalar_def);
1474 /* Case 5: operand is defined by loop-header phi - induction. */
1475 case vect_induction_def:
1477 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info = vinfo_for_stmt (def_stmt);
1481 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1482 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1483 vec_oprnd = PHI_RESULT (vec_stmt);
1484 else
1485 vec_oprnd = gimple_get_lhs (vec_stmt);
1486 return vec_oprnd;
1489 default:
1490 gcc_unreachable ();
1495 /* Function vect_get_vec_def_for_stmt_copy
1497 Return a vector-def for an operand. This function is used when the
1498 vectorized stmt to be created (by the caller to this function) is a "copy"
1499 created in case the vectorized result cannot fit in one vector, and several
1500 copies of the vector-stmt are required. In this case the vector-def is
1501 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1502 of the stmt that defines VEC_OPRND.
1503 DT is the type of the vector def VEC_OPRND.
1505 Context:
1506 In case the vectorization factor (VF) is bigger than the number
1507 of elements that can fit in a vectype (nunits), we have to generate
1508 more than one vector stmt to vectorize the scalar stmt. This situation
1509 arises when there are multiple data-types operated upon in the loop; the
1510 smallest data-type determines the VF, and as a result, when vectorizing
1511 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1512 vector stmt (each computing a vector of 'nunits' results, and together
1513 computing 'VF' results in each iteration). This function is called when
1514 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1515 which VF=16 and nunits=4, so the number of copies required is 4):
1517 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1519 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1520 VS1.1: vx.1 = memref1 VS1.2
1521 VS1.2: vx.2 = memref2 VS1.3
1522 VS1.3: vx.3 = memref3
1524 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1525 VSnew.1: vz1 = vx.1 + ... VSnew.2
1526 VSnew.2: vz2 = vx.2 + ... VSnew.3
1527 VSnew.3: vz3 = vx.3 + ...
1529 The vectorization of S1 is explained in vectorizable_load.
1530 The vectorization of S2:
1531 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1532 the function 'vect_get_vec_def_for_operand' is called to
1533 get the relevant vector-def for each operand of S2. For operand x it
1534 returns the vector-def 'vx.0'.
1536 To create the remaining copies of the vector-stmt (VSnew.j), this
1537 function is called to get the relevant vector-def for each operand. It is
1538 obtained from the respective VS1.j stmt, which is recorded in the
1539 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1541 For example, to obtain the vector-def 'vx.1' in order to create the
1542 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1543 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1544 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1545 and return its def ('vx.1').
1546 Overall, to create the above sequence this function will be called 3 times:
1547 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1548 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1549 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1551 tree
1552 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1554 gimple vec_stmt_for_operand;
1555 stmt_vec_info def_stmt_info;
1557 /* Do nothing; can reuse same def. */
1558 if (dt == vect_external_def || dt == vect_constant_def )
1559 return vec_oprnd;
1561 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1562 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1563 gcc_assert (def_stmt_info);
1564 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1565 gcc_assert (vec_stmt_for_operand);
1566 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1567 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1568 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1569 else
1570 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1571 return vec_oprnd;
1575 /* Get vectorized definitions for the operands to create a copy of an original
1576 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1578 static void
1579 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1580 vec<tree> *vec_oprnds0,
1581 vec<tree> *vec_oprnds1)
1583 tree vec_oprnd = vec_oprnds0->pop ();
1585 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1586 vec_oprnds0->quick_push (vec_oprnd);
1588 if (vec_oprnds1 && vec_oprnds1->length ())
1590 vec_oprnd = vec_oprnds1->pop ();
1591 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1592 vec_oprnds1->quick_push (vec_oprnd);
1597 /* Get vectorized definitions for OP0 and OP1.
1598 REDUC_INDEX is the index of reduction operand in case of reduction,
1599 and -1 otherwise. */
1601 void
1602 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1603 vec<tree> *vec_oprnds0,
1604 vec<tree> *vec_oprnds1,
1605 slp_tree slp_node, int reduc_index)
1607 if (slp_node)
1609 int nops = (op1 == NULL_TREE) ? 1 : 2;
1610 auto_vec<tree> ops (nops);
1611 auto_vec<vec<tree> > vec_defs (nops);
1613 ops.quick_push (op0);
1614 if (op1)
1615 ops.quick_push (op1);
1617 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1619 *vec_oprnds0 = vec_defs[0];
1620 if (op1)
1621 *vec_oprnds1 = vec_defs[1];
1623 else
1625 tree vec_oprnd;
1627 vec_oprnds0->create (1);
1628 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1629 vec_oprnds0->quick_push (vec_oprnd);
1631 if (op1)
1633 vec_oprnds1->create (1);
1634 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1635 vec_oprnds1->quick_push (vec_oprnd);
1641 /* Function vect_finish_stmt_generation.
1643 Insert a new stmt. */
1645 void
1646 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1647 gimple_stmt_iterator *gsi)
1649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1658 gimple at_stmt = gsi_stmt (*gsi);
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1683 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1684 bb_vinfo));
1686 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1690 dump_printf (MSG_NOTE, "\n");
1693 gimple_set_location (vec_stmt, gimple_location (stmt));
1695 /* While EH edges will generally prevent vectorization, stmt might
1696 e.g. be in a must-not-throw region. Ensure newly created stmts
1697 that could throw are part of the same region. */
1698 int lp_nr = lookup_stmt_eh_lp (stmt);
1699 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1700 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1703 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1704 a function declaration if the target has a vectorized version
1705 of the function, or NULL_TREE if the function cannot be vectorized. */
1707 tree
1708 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1710 tree fndecl = gimple_call_fndecl (call);
1712 /* We only handle functions that do not read or clobber memory -- i.e.
1713 const or novops ones. */
1714 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1715 return NULL_TREE;
1717 if (!fndecl
1718 || TREE_CODE (fndecl) != FUNCTION_DECL
1719 || !DECL_BUILT_IN (fndecl))
1720 return NULL_TREE;
1722 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1723 vectype_in);
1727 static tree permute_vec_elements (tree, tree, tree, gimple,
1728 gimple_stmt_iterator *);
1731 /* Function vectorizable_mask_load_store.
1733 Check if STMT performs a conditional load or store that can be vectorized.
1734 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1735 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1736 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1738 static bool
1739 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1740 gimple *vec_stmt, slp_tree slp_node)
1742 tree vec_dest = NULL;
1743 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1744 stmt_vec_info prev_stmt_info;
1745 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1746 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1747 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1748 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1750 tree elem_type;
1751 gimple new_stmt;
1752 tree dummy;
1753 tree dataref_ptr = NULL_TREE;
1754 gimple ptr_incr;
1755 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1756 int ncopies;
1757 int i, j;
1758 bool inv_p;
1759 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1760 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1761 int gather_scale = 1;
1762 enum vect_def_type gather_dt = vect_unknown_def_type;
1763 bool is_store;
1764 tree mask;
1765 gimple def_stmt;
1766 tree def;
1767 enum vect_def_type dt;
1769 if (slp_node != NULL)
1770 return false;
1772 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1773 gcc_assert (ncopies >= 1);
1775 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1776 mask = gimple_call_arg (stmt, 2);
1777 if (TYPE_PRECISION (TREE_TYPE (mask))
1778 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1779 return false;
1781 /* FORNOW. This restriction should be relaxed. */
1782 if (nested_in_vect_loop && ncopies > 1)
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1786 "multiple types in nested loop.");
1787 return false;
1790 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1791 return false;
1793 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1794 return false;
1796 if (!STMT_VINFO_DATA_REF (stmt_info))
1797 return false;
1799 elem_type = TREE_TYPE (vectype);
1801 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1802 return false;
1804 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1805 return false;
1807 if (STMT_VINFO_GATHER_P (stmt_info))
1809 gimple def_stmt;
1810 tree def;
1811 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1812 &gather_off, &gather_scale);
1813 gcc_assert (gather_decl);
1814 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1815 &def_stmt, &def, &gather_dt,
1816 &gather_off_vectype))
1818 if (dump_enabled_p ())
1819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1820 "gather index use not simple.");
1821 return false;
1824 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1825 tree masktype
1826 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1827 if (TREE_CODE (masktype) == INTEGER_TYPE)
1829 if (dump_enabled_p ())
1830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1831 "masked gather with integer mask not supported.");
1832 return false;
1835 else if (tree_int_cst_compare (nested_in_vect_loop
1836 ? STMT_VINFO_DR_STEP (stmt_info)
1837 : DR_STEP (dr), size_zero_node) <= 0)
1838 return false;
1839 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1840 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1841 return false;
1843 if (TREE_CODE (mask) != SSA_NAME)
1844 return false;
1846 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1847 &def_stmt, &def, &dt))
1848 return false;
1850 if (is_store)
1852 tree rhs = gimple_call_arg (stmt, 3);
1853 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1854 &def_stmt, &def, &dt))
1855 return false;
1858 if (!vec_stmt) /* transformation not required. */
1860 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1861 if (is_store)
1862 vect_model_store_cost (stmt_info, ncopies, false, dt,
1863 NULL, NULL, NULL);
1864 else
1865 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1866 return true;
1869 /** Transform. **/
1871 if (STMT_VINFO_GATHER_P (stmt_info))
1873 tree vec_oprnd0 = NULL_TREE, op;
1874 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1875 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1876 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1877 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1878 tree mask_perm_mask = NULL_TREE;
1879 edge pe = loop_preheader_edge (loop);
1880 gimple_seq seq;
1881 basic_block new_bb;
1882 enum { NARROW, NONE, WIDEN } modifier;
1883 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1885 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1886 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1887 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1888 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1889 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1890 scaletype = TREE_VALUE (arglist);
1891 gcc_checking_assert (types_compatible_p (srctype, rettype)
1892 && types_compatible_p (srctype, masktype));
1894 if (nunits == gather_off_nunits)
1895 modifier = NONE;
1896 else if (nunits == gather_off_nunits / 2)
1898 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1899 modifier = WIDEN;
1901 for (i = 0; i < gather_off_nunits; ++i)
1902 sel[i] = i | nunits;
1904 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1905 gcc_assert (perm_mask != NULL_TREE);
1907 else if (nunits == gather_off_nunits * 2)
1909 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1910 modifier = NARROW;
1912 for (i = 0; i < nunits; ++i)
1913 sel[i] = i < gather_off_nunits
1914 ? i : i + nunits - gather_off_nunits;
1916 perm_mask = vect_gen_perm_mask (vectype, sel);
1917 gcc_assert (perm_mask != NULL_TREE);
1918 ncopies *= 2;
1919 for (i = 0; i < nunits; ++i)
1920 sel[i] = i | gather_off_nunits;
1921 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1922 gcc_assert (mask_perm_mask != NULL_TREE);
1924 else
1925 gcc_unreachable ();
1927 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1929 ptr = fold_convert (ptrtype, gather_base);
1930 if (!is_gimple_min_invariant (ptr))
1932 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1933 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1934 gcc_assert (!new_bb);
1937 scale = build_int_cst (scaletype, gather_scale);
1939 prev_stmt_info = NULL;
1940 for (j = 0; j < ncopies; ++j)
1942 if (modifier == WIDEN && (j & 1))
1943 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1944 perm_mask, stmt, gsi);
1945 else if (j == 0)
1946 op = vec_oprnd0
1947 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1948 else
1949 op = vec_oprnd0
1950 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1952 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1955 == TYPE_VECTOR_SUBPARTS (idxtype));
1956 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1957 var = make_ssa_name (var, NULL);
1958 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1959 new_stmt
1960 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1961 op, NULL_TREE);
1962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1963 op = var;
1966 if (mask_perm_mask && (j & 1))
1967 mask_op = permute_vec_elements (mask_op, mask_op,
1968 mask_perm_mask, stmt, gsi);
1969 else
1971 if (j == 0)
1972 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1973 else
1975 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1976 &def_stmt, &def, &dt);
1977 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1980 mask_op = vec_mask;
1981 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1983 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1984 == TYPE_VECTOR_SUBPARTS (masktype));
1985 var = vect_get_new_vect_var (masktype, vect_simple_var,
1986 NULL);
1987 var = make_ssa_name (var, NULL);
1988 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1989 new_stmt
1990 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1991 mask_op, NULL_TREE);
1992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1993 mask_op = var;
1997 new_stmt
1998 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1999 scale);
2001 if (!useless_type_conversion_p (vectype, rettype))
2003 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2004 == TYPE_VECTOR_SUBPARTS (rettype));
2005 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2006 op = make_ssa_name (var, new_stmt);
2007 gimple_call_set_lhs (new_stmt, op);
2008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2009 var = make_ssa_name (vec_dest, NULL);
2010 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2011 new_stmt
2012 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2013 NULL_TREE);
2015 else
2017 var = make_ssa_name (vec_dest, new_stmt);
2018 gimple_call_set_lhs (new_stmt, var);
2021 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2023 if (modifier == NARROW)
2025 if ((j & 1) == 0)
2027 prev_res = var;
2028 continue;
2030 var = permute_vec_elements (prev_res, var,
2031 perm_mask, stmt, gsi);
2032 new_stmt = SSA_NAME_DEF_STMT (var);
2035 if (prev_stmt_info == NULL)
2036 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2037 else
2038 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2039 prev_stmt_info = vinfo_for_stmt (new_stmt);
2041 return true;
2043 else if (is_store)
2045 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2046 prev_stmt_info = NULL;
2047 for (i = 0; i < ncopies; i++)
2049 unsigned align, misalign;
2051 if (i == 0)
2053 tree rhs = gimple_call_arg (stmt, 3);
2054 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2055 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2056 /* We should have catched mismatched types earlier. */
2057 gcc_assert (useless_type_conversion_p (vectype,
2058 TREE_TYPE (vec_rhs)));
2059 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2060 NULL_TREE, &dummy, gsi,
2061 &ptr_incr, false, &inv_p);
2062 gcc_assert (!inv_p);
2064 else
2066 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2067 &def, &dt);
2068 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2069 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2070 &def, &dt);
2071 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2072 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2073 TYPE_SIZE_UNIT (vectype));
2076 align = TYPE_ALIGN_UNIT (vectype);
2077 if (aligned_access_p (dr))
2078 misalign = 0;
2079 else if (DR_MISALIGNMENT (dr) == -1)
2081 align = TYPE_ALIGN_UNIT (elem_type);
2082 misalign = 0;
2084 else
2085 misalign = DR_MISALIGNMENT (dr);
2086 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2087 misalign);
2088 new_stmt
2089 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2090 gimple_call_arg (stmt, 1),
2091 vec_mask, vec_rhs);
2092 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2093 if (i == 0)
2094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2095 else
2096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2097 prev_stmt_info = vinfo_for_stmt (new_stmt);
2100 else
2102 tree vec_mask = NULL_TREE;
2103 prev_stmt_info = NULL;
2104 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2105 for (i = 0; i < ncopies; i++)
2107 unsigned align, misalign;
2109 if (i == 0)
2111 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2112 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2113 NULL_TREE, &dummy, gsi,
2114 &ptr_incr, false, &inv_p);
2115 gcc_assert (!inv_p);
2117 else
2119 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2120 &def, &dt);
2121 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2122 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2123 TYPE_SIZE_UNIT (vectype));
2126 align = TYPE_ALIGN_UNIT (vectype);
2127 if (aligned_access_p (dr))
2128 misalign = 0;
2129 else if (DR_MISALIGNMENT (dr) == -1)
2131 align = TYPE_ALIGN_UNIT (elem_type);
2132 misalign = 0;
2134 else
2135 misalign = DR_MISALIGNMENT (dr);
2136 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2137 misalign);
2138 new_stmt
2139 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2140 gimple_call_arg (stmt, 1),
2141 vec_mask);
2142 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2144 if (i == 0)
2145 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2146 else
2147 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2148 prev_stmt_info = vinfo_for_stmt (new_stmt);
2152 return true;
2156 /* Function vectorizable_call.
2158 Check if STMT performs a function call that can be vectorized.
2159 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2160 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2161 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2163 static bool
2164 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2165 slp_tree slp_node)
2167 tree vec_dest;
2168 tree scalar_dest;
2169 tree op, type;
2170 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2172 tree vectype_out, vectype_in;
2173 int nunits_in;
2174 int nunits_out;
2175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2176 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2177 tree fndecl, new_temp, def, rhs_type;
2178 gimple def_stmt;
2179 enum vect_def_type dt[3]
2180 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2181 gimple new_stmt = NULL;
2182 int ncopies, j;
2183 vec<tree> vargs = vNULL;
2184 enum { NARROW, NONE, WIDEN } modifier;
2185 size_t i, nargs;
2186 tree lhs;
2188 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2189 return false;
2191 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2192 return false;
2194 /* Is STMT a vectorizable call? */
2195 if (!is_gimple_call (stmt))
2196 return false;
2198 if (gimple_call_internal_p (stmt)
2199 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2200 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2201 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2202 slp_node);
2204 if (gimple_call_lhs (stmt) == NULL_TREE
2205 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2206 return false;
2208 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2210 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2212 /* Process function arguments. */
2213 rhs_type = NULL_TREE;
2214 vectype_in = NULL_TREE;
2215 nargs = gimple_call_num_args (stmt);
2217 /* Bail out if the function has more than three arguments, we do not have
2218 interesting builtin functions to vectorize with more than two arguments
2219 except for fma. No arguments is also not good. */
2220 if (nargs == 0 || nargs > 3)
2221 return false;
2223 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2224 if (gimple_call_internal_p (stmt)
2225 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2227 nargs = 0;
2228 rhs_type = unsigned_type_node;
2231 for (i = 0; i < nargs; i++)
2233 tree opvectype;
2235 op = gimple_call_arg (stmt, i);
2237 /* We can only handle calls with arguments of the same type. */
2238 if (rhs_type
2239 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "argument types differ.\n");
2244 return false;
2246 if (!rhs_type)
2247 rhs_type = TREE_TYPE (op);
2249 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2250 &def_stmt, &def, &dt[i], &opvectype))
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "use not simple.\n");
2255 return false;
2258 if (!vectype_in)
2259 vectype_in = opvectype;
2260 else if (opvectype
2261 && opvectype != vectype_in)
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265 "argument vector types differ.\n");
2266 return false;
2269 /* If all arguments are external or constant defs use a vector type with
2270 the same size as the output vector type. */
2271 if (!vectype_in)
2272 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2273 if (vec_stmt)
2274 gcc_assert (vectype_in);
2275 if (!vectype_in)
2277 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2280 "no vectype for scalar type ");
2281 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2282 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2285 return false;
2288 /* FORNOW */
2289 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2290 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2291 if (nunits_in == nunits_out / 2)
2292 modifier = NARROW;
2293 else if (nunits_out == nunits_in)
2294 modifier = NONE;
2295 else if (nunits_out == nunits_in / 2)
2296 modifier = WIDEN;
2297 else
2298 return false;
2300 /* For now, we only vectorize functions if a target specific builtin
2301 is available. TODO -- in some cases, it might be profitable to
2302 insert the calls for pieces of the vector, in order to be able
2303 to vectorize other operations in the loop. */
2304 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2305 if (fndecl == NULL_TREE)
2307 if (gimple_call_internal_p (stmt)
2308 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2309 && !slp_node
2310 && loop_vinfo
2311 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2312 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2313 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2314 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2316 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2317 { 0, 1, 2, ... vf - 1 } vector. */
2318 gcc_assert (nargs == 0);
2320 else
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "function is not vectorizable.\n");
2325 return false;
2329 gcc_assert (!gimple_vuse (stmt));
2331 if (slp_node || PURE_SLP_STMT (stmt_info))
2332 ncopies = 1;
2333 else if (modifier == NARROW)
2334 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2335 else
2336 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2338 /* Sanity check: make sure that at least one copy of the vectorized stmt
2339 needs to be generated. */
2340 gcc_assert (ncopies >= 1);
2342 if (!vec_stmt) /* transformation not required. */
2344 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2347 "\n");
2348 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2349 return true;
2352 /** Transform. **/
2354 if (dump_enabled_p ())
2355 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2357 /* Handle def. */
2358 scalar_dest = gimple_call_lhs (stmt);
2359 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2361 prev_stmt_info = NULL;
2362 switch (modifier)
2364 case NONE:
2365 for (j = 0; j < ncopies; ++j)
2367 /* Build argument list for the vectorized call. */
2368 if (j == 0)
2369 vargs.create (nargs);
2370 else
2371 vargs.truncate (0);
2373 if (slp_node)
2375 auto_vec<vec<tree> > vec_defs (nargs);
2376 vec<tree> vec_oprnds0;
2378 for (i = 0; i < nargs; i++)
2379 vargs.quick_push (gimple_call_arg (stmt, i));
2380 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2381 vec_oprnds0 = vec_defs[0];
2383 /* Arguments are ready. Create the new vector stmt. */
2384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2386 size_t k;
2387 for (k = 0; k < nargs; k++)
2389 vec<tree> vec_oprndsk = vec_defs[k];
2390 vargs[k] = vec_oprndsk[i];
2392 new_stmt = gimple_build_call_vec (fndecl, vargs);
2393 new_temp = make_ssa_name (vec_dest, new_stmt);
2394 gimple_call_set_lhs (new_stmt, new_temp);
2395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2396 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2399 for (i = 0; i < nargs; i++)
2401 vec<tree> vec_oprndsi = vec_defs[i];
2402 vec_oprndsi.release ();
2404 continue;
2407 for (i = 0; i < nargs; i++)
2409 op = gimple_call_arg (stmt, i);
2410 if (j == 0)
2411 vec_oprnd0
2412 = vect_get_vec_def_for_operand (op, stmt, NULL);
2413 else
2415 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2416 vec_oprnd0
2417 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2420 vargs.quick_push (vec_oprnd0);
2423 if (gimple_call_internal_p (stmt)
2424 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2426 tree *v = XALLOCAVEC (tree, nunits_out);
2427 int k;
2428 for (k = 0; k < nunits_out; ++k)
2429 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2430 tree cst = build_vector (vectype_out, v);
2431 tree new_var
2432 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2433 gimple init_stmt = gimple_build_assign (new_var, cst);
2434 new_temp = make_ssa_name (new_var, init_stmt);
2435 gimple_assign_set_lhs (init_stmt, new_temp);
2436 vect_init_vector_1 (stmt, init_stmt, NULL);
2437 new_temp = make_ssa_name (vec_dest, NULL);
2438 new_stmt = gimple_build_assign (new_temp,
2439 gimple_assign_lhs (init_stmt));
2441 else
2443 new_stmt = gimple_build_call_vec (fndecl, vargs);
2444 new_temp = make_ssa_name (vec_dest, new_stmt);
2445 gimple_call_set_lhs (new_stmt, new_temp);
2447 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2449 if (j == 0)
2450 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2451 else
2452 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2454 prev_stmt_info = vinfo_for_stmt (new_stmt);
2457 break;
2459 case NARROW:
2460 for (j = 0; j < ncopies; ++j)
2462 /* Build argument list for the vectorized call. */
2463 if (j == 0)
2464 vargs.create (nargs * 2);
2465 else
2466 vargs.truncate (0);
2468 if (slp_node)
2470 auto_vec<vec<tree> > vec_defs (nargs);
2471 vec<tree> vec_oprnds0;
2473 for (i = 0; i < nargs; i++)
2474 vargs.quick_push (gimple_call_arg (stmt, i));
2475 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2476 vec_oprnds0 = vec_defs[0];
2478 /* Arguments are ready. Create the new vector stmt. */
2479 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2481 size_t k;
2482 vargs.truncate (0);
2483 for (k = 0; k < nargs; k++)
2485 vec<tree> vec_oprndsk = vec_defs[k];
2486 vargs.quick_push (vec_oprndsk[i]);
2487 vargs.quick_push (vec_oprndsk[i + 1]);
2489 new_stmt = gimple_build_call_vec (fndecl, vargs);
2490 new_temp = make_ssa_name (vec_dest, new_stmt);
2491 gimple_call_set_lhs (new_stmt, new_temp);
2492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2493 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2496 for (i = 0; i < nargs; i++)
2498 vec<tree> vec_oprndsi = vec_defs[i];
2499 vec_oprndsi.release ();
2501 continue;
2504 for (i = 0; i < nargs; i++)
2506 op = gimple_call_arg (stmt, i);
2507 if (j == 0)
2509 vec_oprnd0
2510 = vect_get_vec_def_for_operand (op, stmt, NULL);
2511 vec_oprnd1
2512 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2514 else
2516 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2517 vec_oprnd0
2518 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2519 vec_oprnd1
2520 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2523 vargs.quick_push (vec_oprnd0);
2524 vargs.quick_push (vec_oprnd1);
2527 new_stmt = gimple_build_call_vec (fndecl, vargs);
2528 new_temp = make_ssa_name (vec_dest, new_stmt);
2529 gimple_call_set_lhs (new_stmt, new_temp);
2530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2532 if (j == 0)
2533 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2534 else
2535 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2537 prev_stmt_info = vinfo_for_stmt (new_stmt);
2540 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2542 break;
2544 case WIDEN:
2545 /* No current target implements this case. */
2546 return false;
2549 vargs.release ();
2551 /* The call in STMT might prevent it from being removed in dce.
2552 We however cannot remove it here, due to the way the ssa name
2553 it defines is mapped to the new definition. So just replace
2554 rhs of the statement with something harmless. */
2556 if (slp_node)
2557 return true;
2559 type = TREE_TYPE (scalar_dest);
2560 if (is_pattern_stmt_p (stmt_info))
2561 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2562 else
2563 lhs = gimple_call_lhs (stmt);
2564 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2565 set_vinfo_for_stmt (new_stmt, stmt_info);
2566 set_vinfo_for_stmt (stmt, NULL);
2567 STMT_VINFO_STMT (stmt_info) = new_stmt;
2568 gsi_replace (gsi, new_stmt, false);
2570 return true;
2574 struct simd_call_arg_info
2576 tree vectype;
2577 tree op;
2578 enum vect_def_type dt;
2579 HOST_WIDE_INT linear_step;
2580 unsigned int align;
2583 /* Function vectorizable_simd_clone_call.
2585 Check if STMT performs a function call that can be vectorized
2586 by calling a simd clone of the function.
2587 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2588 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2589 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2591 static bool
2592 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2593 gimple *vec_stmt, slp_tree slp_node)
2595 tree vec_dest;
2596 tree scalar_dest;
2597 tree op, type;
2598 tree vec_oprnd0 = NULL_TREE;
2599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2600 tree vectype;
2601 unsigned int nunits;
2602 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2603 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2604 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2605 tree fndecl, new_temp, def;
2606 gimple def_stmt;
2607 gimple new_stmt = NULL;
2608 int ncopies, j;
2609 vec<simd_call_arg_info> arginfo = vNULL;
2610 vec<tree> vargs = vNULL;
2611 size_t i, nargs;
2612 tree lhs, rtype, ratype;
2613 vec<constructor_elt, va_gc> *ret_ctor_elts;
2615 /* Is STMT a vectorizable call? */
2616 if (!is_gimple_call (stmt))
2617 return false;
2619 fndecl = gimple_call_fndecl (stmt);
2620 if (fndecl == NULL_TREE)
2621 return false;
2623 struct cgraph_node *node = cgraph_get_node (fndecl);
2624 if (node == NULL || node->simd_clones == NULL)
2625 return false;
2627 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2628 return false;
2630 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2631 return false;
2633 if (gimple_call_lhs (stmt)
2634 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2635 return false;
2637 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2639 vectype = STMT_VINFO_VECTYPE (stmt_info);
2641 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2642 return false;
2644 /* FORNOW */
2645 if (slp_node || PURE_SLP_STMT (stmt_info))
2646 return false;
2648 /* Process function arguments. */
2649 nargs = gimple_call_num_args (stmt);
2651 /* Bail out if the function has zero arguments. */
2652 if (nargs == 0)
2653 return false;
2655 arginfo.create (nargs);
2657 for (i = 0; i < nargs; i++)
2659 simd_call_arg_info thisarginfo;
2660 affine_iv iv;
2662 thisarginfo.linear_step = 0;
2663 thisarginfo.align = 0;
2664 thisarginfo.op = NULL_TREE;
2666 op = gimple_call_arg (stmt, i);
2667 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2668 &def_stmt, &def, &thisarginfo.dt,
2669 &thisarginfo.vectype)
2670 || thisarginfo.dt == vect_uninitialized_def)
2672 if (dump_enabled_p ())
2673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2674 "use not simple.\n");
2675 arginfo.release ();
2676 return false;
2679 if (thisarginfo.dt == vect_constant_def
2680 || thisarginfo.dt == vect_external_def)
2681 gcc_assert (thisarginfo.vectype == NULL_TREE);
2682 else
2683 gcc_assert (thisarginfo.vectype != NULL_TREE);
2685 if (thisarginfo.dt != vect_constant_def
2686 && thisarginfo.dt != vect_external_def
2687 && loop_vinfo
2688 && TREE_CODE (op) == SSA_NAME
2689 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2690 && tree_fits_shwi_p (iv.step))
2692 thisarginfo.linear_step = tree_to_shwi (iv.step);
2693 thisarginfo.op = iv.base;
2695 else if ((thisarginfo.dt == vect_constant_def
2696 || thisarginfo.dt == vect_external_def)
2697 && POINTER_TYPE_P (TREE_TYPE (op)))
2698 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2700 arginfo.quick_push (thisarginfo);
2703 unsigned int badness = 0;
2704 struct cgraph_node *bestn = NULL;
2705 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2706 bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2707 else
2708 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2709 n = n->simdclone->next_clone)
2711 unsigned int this_badness = 0;
2712 if (n->simdclone->simdlen
2713 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2714 || n->simdclone->nargs != nargs)
2715 continue;
2716 if (n->simdclone->simdlen
2717 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2718 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2719 - exact_log2 (n->simdclone->simdlen)) * 1024;
2720 if (n->simdclone->inbranch)
2721 this_badness += 2048;
2722 int target_badness = targetm.simd_clone.usable (n);
2723 if (target_badness < 0)
2724 continue;
2725 this_badness += target_badness * 512;
2726 /* FORNOW: Have to add code to add the mask argument. */
2727 if (n->simdclone->inbranch)
2728 continue;
2729 for (i = 0; i < nargs; i++)
2731 switch (n->simdclone->args[i].arg_type)
2733 case SIMD_CLONE_ARG_TYPE_VECTOR:
2734 if (!useless_type_conversion_p
2735 (n->simdclone->args[i].orig_type,
2736 TREE_TYPE (gimple_call_arg (stmt, i))))
2737 i = -1;
2738 else if (arginfo[i].dt == vect_constant_def
2739 || arginfo[i].dt == vect_external_def
2740 || arginfo[i].linear_step)
2741 this_badness += 64;
2742 break;
2743 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2744 if (arginfo[i].dt != vect_constant_def
2745 && arginfo[i].dt != vect_external_def)
2746 i = -1;
2747 break;
2748 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2749 if (arginfo[i].dt == vect_constant_def
2750 || arginfo[i].dt == vect_external_def
2751 || (arginfo[i].linear_step
2752 != n->simdclone->args[i].linear_step))
2753 i = -1;
2754 break;
2755 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2756 /* FORNOW */
2757 i = -1;
2758 break;
2759 case SIMD_CLONE_ARG_TYPE_MASK:
2760 gcc_unreachable ();
2762 if (i == (size_t) -1)
2763 break;
2764 if (n->simdclone->args[i].alignment > arginfo[i].align)
2766 i = -1;
2767 break;
2769 if (arginfo[i].align)
2770 this_badness += (exact_log2 (arginfo[i].align)
2771 - exact_log2 (n->simdclone->args[i].alignment));
2773 if (i == (size_t) -1)
2774 continue;
2775 if (bestn == NULL || this_badness < badness)
2777 bestn = n;
2778 badness = this_badness;
2782 if (bestn == NULL)
2784 arginfo.release ();
2785 return false;
2788 for (i = 0; i < nargs; i++)
2789 if ((arginfo[i].dt == vect_constant_def
2790 || arginfo[i].dt == vect_external_def)
2791 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2793 arginfo[i].vectype
2794 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2795 i)));
2796 if (arginfo[i].vectype == NULL
2797 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2798 > bestn->simdclone->simdlen))
2800 arginfo.release ();
2801 return false;
2805 fndecl = bestn->decl;
2806 nunits = bestn->simdclone->simdlen;
2807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2809 /* If the function isn't const, only allow it in simd loops where user
2810 has asserted that at least nunits consecutive iterations can be
2811 performed using SIMD instructions. */
2812 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2813 && gimple_vuse (stmt))
2815 arginfo.release ();
2816 return false;
2819 /* Sanity check: make sure that at least one copy of the vectorized stmt
2820 needs to be generated. */
2821 gcc_assert (ncopies >= 1);
2823 if (!vec_stmt) /* transformation not required. */
2825 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2826 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_NOTE, vect_location,
2829 "=== vectorizable_simd_clone_call ===\n");
2830 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2831 arginfo.release ();
2832 return true;
2835 /** Transform. **/
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2840 /* Handle def. */
2841 scalar_dest = gimple_call_lhs (stmt);
2842 vec_dest = NULL_TREE;
2843 rtype = NULL_TREE;
2844 ratype = NULL_TREE;
2845 if (scalar_dest)
2847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2848 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2849 if (TREE_CODE (rtype) == ARRAY_TYPE)
2851 ratype = rtype;
2852 rtype = TREE_TYPE (ratype);
2856 prev_stmt_info = NULL;
2857 for (j = 0; j < ncopies; ++j)
2859 /* Build argument list for the vectorized call. */
2860 if (j == 0)
2861 vargs.create (nargs);
2862 else
2863 vargs.truncate (0);
2865 for (i = 0; i < nargs; i++)
2867 unsigned int k, l, m, o;
2868 tree atype;
2869 op = gimple_call_arg (stmt, i);
2870 switch (bestn->simdclone->args[i].arg_type)
2872 case SIMD_CLONE_ARG_TYPE_VECTOR:
2873 atype = bestn->simdclone->args[i].vector_type;
2874 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2875 for (m = j * o; m < (j + 1) * o; m++)
2877 if (TYPE_VECTOR_SUBPARTS (atype)
2878 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2880 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2881 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2882 / TYPE_VECTOR_SUBPARTS (atype));
2883 gcc_assert ((k & (k - 1)) == 0);
2884 if (m == 0)
2885 vec_oprnd0
2886 = vect_get_vec_def_for_operand (op, stmt, NULL);
2887 else
2889 vec_oprnd0 = arginfo[i].op;
2890 if ((m & (k - 1)) == 0)
2891 vec_oprnd0
2892 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2893 vec_oprnd0);
2895 arginfo[i].op = vec_oprnd0;
2896 vec_oprnd0
2897 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2898 size_int (prec),
2899 bitsize_int ((m & (k - 1)) * prec));
2900 new_stmt
2901 = gimple_build_assign (make_ssa_name (atype, NULL),
2902 vec_oprnd0);
2903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2904 vargs.safe_push (gimple_assign_lhs (new_stmt));
2906 else
2908 k = (TYPE_VECTOR_SUBPARTS (atype)
2909 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2910 gcc_assert ((k & (k - 1)) == 0);
2911 vec<constructor_elt, va_gc> *ctor_elts;
2912 if (k != 1)
2913 vec_alloc (ctor_elts, k);
2914 else
2915 ctor_elts = NULL;
2916 for (l = 0; l < k; l++)
2918 if (m == 0 && l == 0)
2919 vec_oprnd0
2920 = vect_get_vec_def_for_operand (op, stmt, NULL);
2921 else
2922 vec_oprnd0
2923 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2924 arginfo[i].op);
2925 arginfo[i].op = vec_oprnd0;
2926 if (k == 1)
2927 break;
2928 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2929 vec_oprnd0);
2931 if (k == 1)
2932 vargs.safe_push (vec_oprnd0);
2933 else
2935 vec_oprnd0 = build_constructor (atype, ctor_elts);
2936 new_stmt
2937 = gimple_build_assign (make_ssa_name (atype, NULL),
2938 vec_oprnd0);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 vargs.safe_push (gimple_assign_lhs (new_stmt));
2944 break;
2945 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2946 vargs.safe_push (op);
2947 break;
2948 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2949 if (j == 0)
2951 gimple_seq stmts;
2952 arginfo[i].op
2953 = force_gimple_operand (arginfo[i].op, &stmts, true,
2954 NULL_TREE);
2955 if (stmts != NULL)
2957 basic_block new_bb;
2958 edge pe = loop_preheader_edge (loop);
2959 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2960 gcc_assert (!new_bb);
2962 tree phi_res = copy_ssa_name (op, NULL);
2963 gimple new_phi = create_phi_node (phi_res, loop->header);
2964 set_vinfo_for_stmt (new_phi,
2965 new_stmt_vec_info (new_phi, loop_vinfo,
2966 NULL));
2967 add_phi_arg (new_phi, arginfo[i].op,
2968 loop_preheader_edge (loop), UNKNOWN_LOCATION);
2969 enum tree_code code
2970 = POINTER_TYPE_P (TREE_TYPE (op))
2971 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2972 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2973 ? sizetype : TREE_TYPE (op);
2974 double_int cst
2975 = double_int::from_shwi
2976 (bestn->simdclone->args[i].linear_step);
2977 cst *= double_int::from_uhwi (ncopies * nunits);
2978 tree tcst = double_int_to_tree (type, cst);
2979 tree phi_arg = copy_ssa_name (op, NULL);
2980 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
2981 phi_res, tcst);
2982 gimple_stmt_iterator si = gsi_after_labels (loop->header);
2983 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
2984 set_vinfo_for_stmt (new_stmt,
2985 new_stmt_vec_info (new_stmt, loop_vinfo,
2986 NULL));
2987 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
2988 UNKNOWN_LOCATION);
2989 arginfo[i].op = phi_res;
2990 vargs.safe_push (phi_res);
2992 else
2994 enum tree_code code
2995 = POINTER_TYPE_P (TREE_TYPE (op))
2996 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2997 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2998 ? sizetype : TREE_TYPE (op);
2999 double_int cst
3000 = double_int::from_shwi
3001 (bestn->simdclone->args[i].linear_step);
3002 cst *= double_int::from_uhwi (j * nunits);
3003 tree tcst = double_int_to_tree (type, cst);
3004 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3005 new_stmt
3006 = gimple_build_assign_with_ops (code, new_temp,
3007 arginfo[i].op, tcst);
3008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3009 vargs.safe_push (new_temp);
3011 break;
3012 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3013 default:
3014 gcc_unreachable ();
3018 new_stmt = gimple_build_call_vec (fndecl, vargs);
3019 if (vec_dest)
3021 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3022 if (ratype)
3023 new_temp = create_tmp_var (ratype, NULL);
3024 else if (TYPE_VECTOR_SUBPARTS (vectype)
3025 == TYPE_VECTOR_SUBPARTS (rtype))
3026 new_temp = make_ssa_name (vec_dest, new_stmt);
3027 else
3028 new_temp = make_ssa_name (rtype, new_stmt);
3029 gimple_call_set_lhs (new_stmt, new_temp);
3031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3033 if (vec_dest)
3035 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3037 unsigned int k, l;
3038 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3039 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3040 gcc_assert ((k & (k - 1)) == 0);
3041 for (l = 0; l < k; l++)
3043 tree t;
3044 if (ratype)
3046 t = build_fold_addr_expr (new_temp);
3047 t = build2 (MEM_REF, vectype, t,
3048 build_int_cst (TREE_TYPE (t),
3049 l * prec / BITS_PER_UNIT));
3051 else
3052 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3053 size_int (prec), bitsize_int (l * prec));
3054 new_stmt
3055 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3057 if (j == 0 && l == 0)
3058 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3059 else
3060 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3062 prev_stmt_info = vinfo_for_stmt (new_stmt);
3065 if (ratype)
3067 tree clobber = build_constructor (ratype, NULL);
3068 TREE_THIS_VOLATILE (clobber) = 1;
3069 new_stmt = gimple_build_assign (new_temp, clobber);
3070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3072 continue;
3074 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3076 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3077 / TYPE_VECTOR_SUBPARTS (rtype));
3078 gcc_assert ((k & (k - 1)) == 0);
3079 if ((j & (k - 1)) == 0)
3080 vec_alloc (ret_ctor_elts, k);
3081 if (ratype)
3083 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3084 for (m = 0; m < o; m++)
3086 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3087 size_int (m), NULL_TREE, NULL_TREE);
3088 new_stmt
3089 = gimple_build_assign (make_ssa_name (rtype, NULL),
3090 tem);
3091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3092 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3093 gimple_assign_lhs (new_stmt));
3095 tree clobber = build_constructor (ratype, NULL);
3096 TREE_THIS_VOLATILE (clobber) = 1;
3097 new_stmt = gimple_build_assign (new_temp, clobber);
3098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3100 else
3101 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3102 if ((j & (k - 1)) != k - 1)
3103 continue;
3104 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3105 new_stmt
3106 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3107 vec_oprnd0);
3108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3110 if ((unsigned) j == k - 1)
3111 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3112 else
3113 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3115 prev_stmt_info = vinfo_for_stmt (new_stmt);
3116 continue;
3118 else if (ratype)
3120 tree t = build_fold_addr_expr (new_temp);
3121 t = build2 (MEM_REF, vectype, t,
3122 build_int_cst (TREE_TYPE (t), 0));
3123 new_stmt
3124 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3126 tree clobber = build_constructor (ratype, NULL);
3127 TREE_THIS_VOLATILE (clobber) = 1;
3128 vect_finish_stmt_generation (stmt,
3129 gimple_build_assign (new_temp,
3130 clobber), gsi);
3134 if (j == 0)
3135 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3136 else
3137 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3139 prev_stmt_info = vinfo_for_stmt (new_stmt);
3142 vargs.release ();
3144 /* The call in STMT might prevent it from being removed in dce.
3145 We however cannot remove it here, due to the way the ssa name
3146 it defines is mapped to the new definition. So just replace
3147 rhs of the statement with something harmless. */
3149 if (slp_node)
3150 return true;
3152 if (scalar_dest)
3154 type = TREE_TYPE (scalar_dest);
3155 if (is_pattern_stmt_p (stmt_info))
3156 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3157 else
3158 lhs = gimple_call_lhs (stmt);
3159 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3161 else
3162 new_stmt = gimple_build_nop ();
3163 set_vinfo_for_stmt (new_stmt, stmt_info);
3164 set_vinfo_for_stmt (stmt, NULL);
3165 STMT_VINFO_STMT (stmt_info) = new_stmt;
3166 gsi_replace (gsi, new_stmt, false);
3167 unlink_stmt_vdef (stmt);
3169 return true;
3173 /* Function vect_gen_widened_results_half
3175 Create a vector stmt whose code, type, number of arguments, and result
3176 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3177 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3178 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3179 needs to be created (DECL is a function-decl of a target-builtin).
3180 STMT is the original scalar stmt that we are vectorizing. */
3182 static gimple
3183 vect_gen_widened_results_half (enum tree_code code,
3184 tree decl,
3185 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3186 tree vec_dest, gimple_stmt_iterator *gsi,
3187 gimple stmt)
3189 gimple new_stmt;
3190 tree new_temp;
3192 /* Generate half of the widened result: */
3193 if (code == CALL_EXPR)
3195 /* Target specific support */
3196 if (op_type == binary_op)
3197 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3198 else
3199 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3200 new_temp = make_ssa_name (vec_dest, new_stmt);
3201 gimple_call_set_lhs (new_stmt, new_temp);
3203 else
3205 /* Generic support */
3206 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3207 if (op_type != binary_op)
3208 vec_oprnd1 = NULL;
3209 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3210 vec_oprnd1);
3211 new_temp = make_ssa_name (vec_dest, new_stmt);
3212 gimple_assign_set_lhs (new_stmt, new_temp);
3214 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3216 return new_stmt;
3220 /* Get vectorized definitions for loop-based vectorization. For the first
3221 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3222 scalar operand), and for the rest we get a copy with
3223 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3224 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3225 The vectors are collected into VEC_OPRNDS. */
3227 static void
3228 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3229 vec<tree> *vec_oprnds, int multi_step_cvt)
3231 tree vec_oprnd;
3233 /* Get first vector operand. */
3234 /* All the vector operands except the very first one (that is scalar oprnd)
3235 are stmt copies. */
3236 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3237 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3238 else
3239 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3241 vec_oprnds->quick_push (vec_oprnd);
3243 /* Get second vector operand. */
3244 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3245 vec_oprnds->quick_push (vec_oprnd);
3247 *oprnd = vec_oprnd;
3249 /* For conversion in multiple steps, continue to get operands
3250 recursively. */
3251 if (multi_step_cvt)
3252 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3256 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3257 For multi-step conversions store the resulting vectors and call the function
3258 recursively. */
3260 static void
3261 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3262 int multi_step_cvt, gimple stmt,
3263 vec<tree> vec_dsts,
3264 gimple_stmt_iterator *gsi,
3265 slp_tree slp_node, enum tree_code code,
3266 stmt_vec_info *prev_stmt_info)
3268 unsigned int i;
3269 tree vop0, vop1, new_tmp, vec_dest;
3270 gimple new_stmt;
3271 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3273 vec_dest = vec_dsts.pop ();
3275 for (i = 0; i < vec_oprnds->length (); i += 2)
3277 /* Create demotion operation. */
3278 vop0 = (*vec_oprnds)[i];
3279 vop1 = (*vec_oprnds)[i + 1];
3280 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3281 new_tmp = make_ssa_name (vec_dest, new_stmt);
3282 gimple_assign_set_lhs (new_stmt, new_tmp);
3283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3285 if (multi_step_cvt)
3286 /* Store the resulting vector for next recursive call. */
3287 (*vec_oprnds)[i/2] = new_tmp;
3288 else
3290 /* This is the last step of the conversion sequence. Store the
3291 vectors in SLP_NODE or in vector info of the scalar statement
3292 (or in STMT_VINFO_RELATED_STMT chain). */
3293 if (slp_node)
3294 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3295 else
3297 if (!*prev_stmt_info)
3298 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3299 else
3300 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3302 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3307 /* For multi-step demotion operations we first generate demotion operations
3308 from the source type to the intermediate types, and then combine the
3309 results (stored in VEC_OPRNDS) in demotion operation to the destination
3310 type. */
3311 if (multi_step_cvt)
3313 /* At each level of recursion we have half of the operands we had at the
3314 previous level. */
3315 vec_oprnds->truncate ((i+1)/2);
3316 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3317 stmt, vec_dsts, gsi, slp_node,
3318 VEC_PACK_TRUNC_EXPR,
3319 prev_stmt_info);
3322 vec_dsts.quick_push (vec_dest);
3326 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3327 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3328 the resulting vectors and call the function recursively. */
3330 static void
3331 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3332 vec<tree> *vec_oprnds1,
3333 gimple stmt, tree vec_dest,
3334 gimple_stmt_iterator *gsi,
3335 enum tree_code code1,
3336 enum tree_code code2, tree decl1,
3337 tree decl2, int op_type)
3339 int i;
3340 tree vop0, vop1, new_tmp1, new_tmp2;
3341 gimple new_stmt1, new_stmt2;
3342 vec<tree> vec_tmp = vNULL;
3344 vec_tmp.create (vec_oprnds0->length () * 2);
3345 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3347 if (op_type == binary_op)
3348 vop1 = (*vec_oprnds1)[i];
3349 else
3350 vop1 = NULL_TREE;
3352 /* Generate the two halves of promotion operation. */
3353 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3354 op_type, vec_dest, gsi, stmt);
3355 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3356 op_type, vec_dest, gsi, stmt);
3357 if (is_gimple_call (new_stmt1))
3359 new_tmp1 = gimple_call_lhs (new_stmt1);
3360 new_tmp2 = gimple_call_lhs (new_stmt2);
3362 else
3364 new_tmp1 = gimple_assign_lhs (new_stmt1);
3365 new_tmp2 = gimple_assign_lhs (new_stmt2);
3368 /* Store the results for the next step. */
3369 vec_tmp.quick_push (new_tmp1);
3370 vec_tmp.quick_push (new_tmp2);
3373 vec_oprnds0->release ();
3374 *vec_oprnds0 = vec_tmp;
3378 /* Check if STMT performs a conversion operation, that can be vectorized.
3379 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3380 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3381 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3383 static bool
3384 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3385 gimple *vec_stmt, slp_tree slp_node)
3387 tree vec_dest;
3388 tree scalar_dest;
3389 tree op0, op1 = NULL_TREE;
3390 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3391 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3392 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3393 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3394 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3395 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3396 tree new_temp;
3397 tree def;
3398 gimple def_stmt;
3399 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3400 gimple new_stmt = NULL;
3401 stmt_vec_info prev_stmt_info;
3402 int nunits_in;
3403 int nunits_out;
3404 tree vectype_out, vectype_in;
3405 int ncopies, i, j;
3406 tree lhs_type, rhs_type;
3407 enum { NARROW, NONE, WIDEN } modifier;
3408 vec<tree> vec_oprnds0 = vNULL;
3409 vec<tree> vec_oprnds1 = vNULL;
3410 tree vop0;
3411 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3412 int multi_step_cvt = 0;
3413 vec<tree> vec_dsts = vNULL;
3414 vec<tree> interm_types = vNULL;
3415 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3416 int op_type;
3417 enum machine_mode rhs_mode;
3418 unsigned short fltsz;
3420 /* Is STMT a vectorizable conversion? */
3422 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3423 return false;
3425 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3426 return false;
3428 if (!is_gimple_assign (stmt))
3429 return false;
3431 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3432 return false;
3434 code = gimple_assign_rhs_code (stmt);
3435 if (!CONVERT_EXPR_CODE_P (code)
3436 && code != FIX_TRUNC_EXPR
3437 && code != FLOAT_EXPR
3438 && code != WIDEN_MULT_EXPR
3439 && code != WIDEN_LSHIFT_EXPR)
3440 return false;
3442 op_type = TREE_CODE_LENGTH (code);
3444 /* Check types of lhs and rhs. */
3445 scalar_dest = gimple_assign_lhs (stmt);
3446 lhs_type = TREE_TYPE (scalar_dest);
3447 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3449 op0 = gimple_assign_rhs1 (stmt);
3450 rhs_type = TREE_TYPE (op0);
3452 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3453 && !((INTEGRAL_TYPE_P (lhs_type)
3454 && INTEGRAL_TYPE_P (rhs_type))
3455 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3456 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3457 return false;
3459 if ((INTEGRAL_TYPE_P (lhs_type)
3460 && (TYPE_PRECISION (lhs_type)
3461 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3462 || (INTEGRAL_TYPE_P (rhs_type)
3463 && (TYPE_PRECISION (rhs_type)
3464 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468 "type conversion to/from bit-precision unsupported."
3469 "\n");
3470 return false;
3473 /* Check the operands of the operation. */
3474 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3475 &def_stmt, &def, &dt[0], &vectype_in))
3477 if (dump_enabled_p ())
3478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3479 "use not simple.\n");
3480 return false;
3482 if (op_type == binary_op)
3484 bool ok;
3486 op1 = gimple_assign_rhs2 (stmt);
3487 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3488 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3489 OP1. */
3490 if (CONSTANT_CLASS_P (op0))
3491 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3492 &def_stmt, &def, &dt[1], &vectype_in);
3493 else
3494 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3495 &def, &dt[1]);
3497 if (!ok)
3499 if (dump_enabled_p ())
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3501 "use not simple.\n");
3502 return false;
3506 /* If op0 is an external or constant defs use a vector type of
3507 the same size as the output vector type. */
3508 if (!vectype_in)
3509 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3510 if (vec_stmt)
3511 gcc_assert (vectype_in);
3512 if (!vectype_in)
3514 if (dump_enabled_p ())
3516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3517 "no vectype for scalar type ");
3518 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3519 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3522 return false;
3525 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3526 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3527 if (nunits_in < nunits_out)
3528 modifier = NARROW;
3529 else if (nunits_out == nunits_in)
3530 modifier = NONE;
3531 else
3532 modifier = WIDEN;
3534 /* Multiple types in SLP are handled by creating the appropriate number of
3535 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3536 case of SLP. */
3537 if (slp_node || PURE_SLP_STMT (stmt_info))
3538 ncopies = 1;
3539 else if (modifier == NARROW)
3540 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3541 else
3542 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3544 /* Sanity check: make sure that at least one copy of the vectorized stmt
3545 needs to be generated. */
3546 gcc_assert (ncopies >= 1);
3548 /* Supportable by target? */
3549 switch (modifier)
3551 case NONE:
3552 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3553 return false;
3554 if (supportable_convert_operation (code, vectype_out, vectype_in,
3555 &decl1, &code1))
3556 break;
3557 /* FALLTHRU */
3558 unsupported:
3559 if (dump_enabled_p ())
3560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3561 "conversion not supported by target.\n");
3562 return false;
3564 case WIDEN:
3565 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3566 &code1, &code2, &multi_step_cvt,
3567 &interm_types))
3569 /* Binary widening operation can only be supported directly by the
3570 architecture. */
3571 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3572 break;
3575 if (code != FLOAT_EXPR
3576 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3577 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3578 goto unsupported;
3580 rhs_mode = TYPE_MODE (rhs_type);
3581 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3582 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3583 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3584 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3586 cvt_type
3587 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3588 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3589 if (cvt_type == NULL_TREE)
3590 goto unsupported;
3592 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3594 if (!supportable_convert_operation (code, vectype_out,
3595 cvt_type, &decl1, &codecvt1))
3596 goto unsupported;
3598 else if (!supportable_widening_operation (code, stmt, vectype_out,
3599 cvt_type, &codecvt1,
3600 &codecvt2, &multi_step_cvt,
3601 &interm_types))
3602 continue;
3603 else
3604 gcc_assert (multi_step_cvt == 0);
3606 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3607 vectype_in, &code1, &code2,
3608 &multi_step_cvt, &interm_types))
3609 break;
3612 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3613 goto unsupported;
3615 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3616 codecvt2 = ERROR_MARK;
3617 else
3619 multi_step_cvt++;
3620 interm_types.safe_push (cvt_type);
3621 cvt_type = NULL_TREE;
3623 break;
3625 case NARROW:
3626 gcc_assert (op_type == unary_op);
3627 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3628 &code1, &multi_step_cvt,
3629 &interm_types))
3630 break;
3632 if (code != FIX_TRUNC_EXPR
3633 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3634 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3635 goto unsupported;
3637 rhs_mode = TYPE_MODE (rhs_type);
3638 cvt_type
3639 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3640 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3641 if (cvt_type == NULL_TREE)
3642 goto unsupported;
3643 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3644 &decl1, &codecvt1))
3645 goto unsupported;
3646 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3647 &code1, &multi_step_cvt,
3648 &interm_types))
3649 break;
3650 goto unsupported;
3652 default:
3653 gcc_unreachable ();
3656 if (!vec_stmt) /* transformation not required. */
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_NOTE, vect_location,
3660 "=== vectorizable_conversion ===\n");
3661 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3663 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3664 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3666 else if (modifier == NARROW)
3668 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3669 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3671 else
3673 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3674 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3676 interm_types.release ();
3677 return true;
3680 /** Transform. **/
3681 if (dump_enabled_p ())
3682 dump_printf_loc (MSG_NOTE, vect_location,
3683 "transform conversion. ncopies = %d.\n", ncopies);
3685 if (op_type == binary_op)
3687 if (CONSTANT_CLASS_P (op0))
3688 op0 = fold_convert (TREE_TYPE (op1), op0);
3689 else if (CONSTANT_CLASS_P (op1))
3690 op1 = fold_convert (TREE_TYPE (op0), op1);
3693 /* In case of multi-step conversion, we first generate conversion operations
3694 to the intermediate types, and then from that types to the final one.
3695 We create vector destinations for the intermediate type (TYPES) received
3696 from supportable_*_operation, and store them in the correct order
3697 for future use in vect_create_vectorized_*_stmts (). */
3698 vec_dsts.create (multi_step_cvt + 1);
3699 vec_dest = vect_create_destination_var (scalar_dest,
3700 (cvt_type && modifier == WIDEN)
3701 ? cvt_type : vectype_out);
3702 vec_dsts.quick_push (vec_dest);
3704 if (multi_step_cvt)
3706 for (i = interm_types.length () - 1;
3707 interm_types.iterate (i, &intermediate_type); i--)
3709 vec_dest = vect_create_destination_var (scalar_dest,
3710 intermediate_type);
3711 vec_dsts.quick_push (vec_dest);
3715 if (cvt_type)
3716 vec_dest = vect_create_destination_var (scalar_dest,
3717 modifier == WIDEN
3718 ? vectype_out : cvt_type);
3720 if (!slp_node)
3722 if (modifier == WIDEN)
3724 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3725 if (op_type == binary_op)
3726 vec_oprnds1.create (1);
3728 else if (modifier == NARROW)
3729 vec_oprnds0.create (
3730 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3732 else if (code == WIDEN_LSHIFT_EXPR)
3733 vec_oprnds1.create (slp_node->vec_stmts_size);
3735 last_oprnd = op0;
3736 prev_stmt_info = NULL;
3737 switch (modifier)
3739 case NONE:
3740 for (j = 0; j < ncopies; j++)
3742 if (j == 0)
3743 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3744 -1);
3745 else
3746 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3748 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3750 /* Arguments are ready, create the new vector stmt. */
3751 if (code1 == CALL_EXPR)
3753 new_stmt = gimple_build_call (decl1, 1, vop0);
3754 new_temp = make_ssa_name (vec_dest, new_stmt);
3755 gimple_call_set_lhs (new_stmt, new_temp);
3757 else
3759 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3760 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3761 vop0, NULL);
3762 new_temp = make_ssa_name (vec_dest, new_stmt);
3763 gimple_assign_set_lhs (new_stmt, new_temp);
3766 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3767 if (slp_node)
3768 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3771 if (j == 0)
3772 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3773 else
3774 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3775 prev_stmt_info = vinfo_for_stmt (new_stmt);
3777 break;
3779 case WIDEN:
3780 /* In case the vectorization factor (VF) is bigger than the number
3781 of elements that we can fit in a vectype (nunits), we have to
3782 generate more than one vector stmt - i.e - we need to "unroll"
3783 the vector stmt by a factor VF/nunits. */
3784 for (j = 0; j < ncopies; j++)
3786 /* Handle uses. */
3787 if (j == 0)
3789 if (slp_node)
3791 if (code == WIDEN_LSHIFT_EXPR)
3793 unsigned int k;
3795 vec_oprnd1 = op1;
3796 /* Store vec_oprnd1 for every vector stmt to be created
3797 for SLP_NODE. We check during the analysis that all
3798 the shift arguments are the same. */
3799 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3800 vec_oprnds1.quick_push (vec_oprnd1);
3802 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3803 slp_node, -1);
3805 else
3806 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3807 &vec_oprnds1, slp_node, -1);
3809 else
3811 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3812 vec_oprnds0.quick_push (vec_oprnd0);
3813 if (op_type == binary_op)
3815 if (code == WIDEN_LSHIFT_EXPR)
3816 vec_oprnd1 = op1;
3817 else
3818 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3819 NULL);
3820 vec_oprnds1.quick_push (vec_oprnd1);
3824 else
3826 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3827 vec_oprnds0.truncate (0);
3828 vec_oprnds0.quick_push (vec_oprnd0);
3829 if (op_type == binary_op)
3831 if (code == WIDEN_LSHIFT_EXPR)
3832 vec_oprnd1 = op1;
3833 else
3834 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3835 vec_oprnd1);
3836 vec_oprnds1.truncate (0);
3837 vec_oprnds1.quick_push (vec_oprnd1);
3841 /* Arguments are ready. Create the new vector stmts. */
3842 for (i = multi_step_cvt; i >= 0; i--)
3844 tree this_dest = vec_dsts[i];
3845 enum tree_code c1 = code1, c2 = code2;
3846 if (i == 0 && codecvt2 != ERROR_MARK)
3848 c1 = codecvt1;
3849 c2 = codecvt2;
3851 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3852 &vec_oprnds1,
3853 stmt, this_dest, gsi,
3854 c1, c2, decl1, decl2,
3855 op_type);
3858 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3860 if (cvt_type)
3862 if (codecvt1 == CALL_EXPR)
3864 new_stmt = gimple_build_call (decl1, 1, vop0);
3865 new_temp = make_ssa_name (vec_dest, new_stmt);
3866 gimple_call_set_lhs (new_stmt, new_temp);
3868 else
3870 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3871 new_temp = make_ssa_name (vec_dest, NULL);
3872 new_stmt = gimple_build_assign_with_ops (codecvt1,
3873 new_temp,
3874 vop0, NULL);
3877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3879 else
3880 new_stmt = SSA_NAME_DEF_STMT (vop0);
3882 if (slp_node)
3883 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3884 else
3886 if (!prev_stmt_info)
3887 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3888 else
3889 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3890 prev_stmt_info = vinfo_for_stmt (new_stmt);
3895 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3896 break;
3898 case NARROW:
3899 /* In case the vectorization factor (VF) is bigger than the number
3900 of elements that we can fit in a vectype (nunits), we have to
3901 generate more than one vector stmt - i.e - we need to "unroll"
3902 the vector stmt by a factor VF/nunits. */
3903 for (j = 0; j < ncopies; j++)
3905 /* Handle uses. */
3906 if (slp_node)
3907 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3908 slp_node, -1);
3909 else
3911 vec_oprnds0.truncate (0);
3912 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3913 vect_pow2 (multi_step_cvt) - 1);
3916 /* Arguments are ready. Create the new vector stmts. */
3917 if (cvt_type)
3918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3920 if (codecvt1 == CALL_EXPR)
3922 new_stmt = gimple_build_call (decl1, 1, vop0);
3923 new_temp = make_ssa_name (vec_dest, new_stmt);
3924 gimple_call_set_lhs (new_stmt, new_temp);
3926 else
3928 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3929 new_temp = make_ssa_name (vec_dest, NULL);
3930 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3931 vop0, NULL);
3934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3935 vec_oprnds0[i] = new_temp;
3938 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3939 stmt, vec_dsts, gsi,
3940 slp_node, code1,
3941 &prev_stmt_info);
3944 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3945 break;
3948 vec_oprnds0.release ();
3949 vec_oprnds1.release ();
3950 vec_dsts.release ();
3951 interm_types.release ();
3953 return true;
3957 /* Function vectorizable_assignment.
3959 Check if STMT performs an assignment (copy) that can be vectorized.
3960 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3961 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3962 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3964 static bool
3965 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3966 gimple *vec_stmt, slp_tree slp_node)
3968 tree vec_dest;
3969 tree scalar_dest;
3970 tree op;
3971 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3972 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3973 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3974 tree new_temp;
3975 tree def;
3976 gimple def_stmt;
3977 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3978 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3979 int ncopies;
3980 int i, j;
3981 vec<tree> vec_oprnds = vNULL;
3982 tree vop;
3983 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3984 gimple new_stmt = NULL;
3985 stmt_vec_info prev_stmt_info = NULL;
3986 enum tree_code code;
3987 tree vectype_in;
3989 /* Multiple types in SLP are handled by creating the appropriate number of
3990 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3991 case of SLP. */
3992 if (slp_node || PURE_SLP_STMT (stmt_info))
3993 ncopies = 1;
3994 else
3995 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3997 gcc_assert (ncopies >= 1);
3999 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4000 return false;
4002 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4003 return false;
4005 /* Is vectorizable assignment? */
4006 if (!is_gimple_assign (stmt))
4007 return false;
4009 scalar_dest = gimple_assign_lhs (stmt);
4010 if (TREE_CODE (scalar_dest) != SSA_NAME)
4011 return false;
4013 code = gimple_assign_rhs_code (stmt);
4014 if (gimple_assign_single_p (stmt)
4015 || code == PAREN_EXPR
4016 || CONVERT_EXPR_CODE_P (code))
4017 op = gimple_assign_rhs1 (stmt);
4018 else
4019 return false;
4021 if (code == VIEW_CONVERT_EXPR)
4022 op = TREE_OPERAND (op, 0);
4024 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4025 &def_stmt, &def, &dt[0], &vectype_in))
4027 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4029 "use not simple.\n");
4030 return false;
4033 /* We can handle NOP_EXPR conversions that do not change the number
4034 of elements or the vector size. */
4035 if ((CONVERT_EXPR_CODE_P (code)
4036 || code == VIEW_CONVERT_EXPR)
4037 && (!vectype_in
4038 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4039 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4040 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4041 return false;
4043 /* We do not handle bit-precision changes. */
4044 if ((CONVERT_EXPR_CODE_P (code)
4045 || code == VIEW_CONVERT_EXPR)
4046 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4047 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4048 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4049 || ((TYPE_PRECISION (TREE_TYPE (op))
4050 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4051 /* But a conversion that does not change the bit-pattern is ok. */
4052 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4053 > TYPE_PRECISION (TREE_TYPE (op)))
4054 && TYPE_UNSIGNED (TREE_TYPE (op))))
4056 if (dump_enabled_p ())
4057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4058 "type conversion to/from bit-precision "
4059 "unsupported.\n");
4060 return false;
4063 if (!vec_stmt) /* transformation not required. */
4065 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4066 if (dump_enabled_p ())
4067 dump_printf_loc (MSG_NOTE, vect_location,
4068 "=== vectorizable_assignment ===\n");
4069 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4070 return true;
4073 /** Transform. **/
4074 if (dump_enabled_p ())
4075 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4077 /* Handle def. */
4078 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4080 /* Handle use. */
4081 for (j = 0; j < ncopies; j++)
4083 /* Handle uses. */
4084 if (j == 0)
4085 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4086 else
4087 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4089 /* Arguments are ready. create the new vector stmt. */
4090 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4092 if (CONVERT_EXPR_CODE_P (code)
4093 || code == VIEW_CONVERT_EXPR)
4094 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4095 new_stmt = gimple_build_assign (vec_dest, vop);
4096 new_temp = make_ssa_name (vec_dest, new_stmt);
4097 gimple_assign_set_lhs (new_stmt, new_temp);
4098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4099 if (slp_node)
4100 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4103 if (slp_node)
4104 continue;
4106 if (j == 0)
4107 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4108 else
4109 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4111 prev_stmt_info = vinfo_for_stmt (new_stmt);
4114 vec_oprnds.release ();
4115 return true;
4119 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4120 either as shift by a scalar or by a vector. */
4122 bool
4123 vect_supportable_shift (enum tree_code code, tree scalar_type)
4126 enum machine_mode vec_mode;
4127 optab optab;
4128 int icode;
4129 tree vectype;
4131 vectype = get_vectype_for_scalar_type (scalar_type);
4132 if (!vectype)
4133 return false;
4135 optab = optab_for_tree_code (code, vectype, optab_scalar);
4136 if (!optab
4137 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4139 optab = optab_for_tree_code (code, vectype, optab_vector);
4140 if (!optab
4141 || (optab_handler (optab, TYPE_MODE (vectype))
4142 == CODE_FOR_nothing))
4143 return false;
4146 vec_mode = TYPE_MODE (vectype);
4147 icode = (int) optab_handler (optab, vec_mode);
4148 if (icode == CODE_FOR_nothing)
4149 return false;
4151 return true;
4155 /* Function vectorizable_shift.
4157 Check if STMT performs a shift operation that can be vectorized.
4158 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4159 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4160 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4162 static bool
4163 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4164 gimple *vec_stmt, slp_tree slp_node)
4166 tree vec_dest;
4167 tree scalar_dest;
4168 tree op0, op1 = NULL;
4169 tree vec_oprnd1 = NULL_TREE;
4170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4171 tree vectype;
4172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4173 enum tree_code code;
4174 enum machine_mode vec_mode;
4175 tree new_temp;
4176 optab optab;
4177 int icode;
4178 enum machine_mode optab_op2_mode;
4179 tree def;
4180 gimple def_stmt;
4181 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4182 gimple new_stmt = NULL;
4183 stmt_vec_info prev_stmt_info;
4184 int nunits_in;
4185 int nunits_out;
4186 tree vectype_out;
4187 tree op1_vectype;
4188 int ncopies;
4189 int j, i;
4190 vec<tree> vec_oprnds0 = vNULL;
4191 vec<tree> vec_oprnds1 = vNULL;
4192 tree vop0, vop1;
4193 unsigned int k;
4194 bool scalar_shift_arg = true;
4195 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4196 int vf;
4198 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4199 return false;
4201 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4202 return false;
4204 /* Is STMT a vectorizable binary/unary operation? */
4205 if (!is_gimple_assign (stmt))
4206 return false;
4208 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4209 return false;
4211 code = gimple_assign_rhs_code (stmt);
4213 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4214 || code == RROTATE_EXPR))
4215 return false;
4217 scalar_dest = gimple_assign_lhs (stmt);
4218 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4219 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4220 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4222 if (dump_enabled_p ())
4223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4224 "bit-precision shifts not supported.\n");
4225 return false;
4228 op0 = gimple_assign_rhs1 (stmt);
4229 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4230 &def_stmt, &def, &dt[0], &vectype))
4232 if (dump_enabled_p ())
4233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4234 "use not simple.\n");
4235 return false;
4237 /* If op0 is an external or constant def use a vector type with
4238 the same size as the output vector type. */
4239 if (!vectype)
4240 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4241 if (vec_stmt)
4242 gcc_assert (vectype);
4243 if (!vectype)
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4247 "no vectype for scalar type\n");
4248 return false;
4251 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4252 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4253 if (nunits_out != nunits_in)
4254 return false;
4256 op1 = gimple_assign_rhs2 (stmt);
4257 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4258 &def, &dt[1], &op1_vectype))
4260 if (dump_enabled_p ())
4261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4262 "use not simple.\n");
4263 return false;
4266 if (loop_vinfo)
4267 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4268 else
4269 vf = 1;
4271 /* Multiple types in SLP are handled by creating the appropriate number of
4272 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4273 case of SLP. */
4274 if (slp_node || PURE_SLP_STMT (stmt_info))
4275 ncopies = 1;
4276 else
4277 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4279 gcc_assert (ncopies >= 1);
4281 /* Determine whether the shift amount is a vector, or scalar. If the
4282 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4284 if (dt[1] == vect_internal_def && !slp_node)
4285 scalar_shift_arg = false;
4286 else if (dt[1] == vect_constant_def
4287 || dt[1] == vect_external_def
4288 || dt[1] == vect_internal_def)
4290 /* In SLP, need to check whether the shift count is the same,
4291 in loops if it is a constant or invariant, it is always
4292 a scalar shift. */
4293 if (slp_node)
4295 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4296 gimple slpstmt;
4298 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4299 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4300 scalar_shift_arg = false;
4303 else
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4307 "operand mode requires invariant argument.\n");
4308 return false;
4311 /* Vector shifted by vector. */
4312 if (!scalar_shift_arg)
4314 optab = optab_for_tree_code (code, vectype, optab_vector);
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_NOTE, vect_location,
4317 "vector/vector shift/rotate found.\n");
4319 if (!op1_vectype)
4320 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4321 if (op1_vectype == NULL_TREE
4322 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4324 if (dump_enabled_p ())
4325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4326 "unusable type for last operand in"
4327 " vector/vector shift/rotate.\n");
4328 return false;
4331 /* See if the machine has a vector shifted by scalar insn and if not
4332 then see if it has a vector shifted by vector insn. */
4333 else
4335 optab = optab_for_tree_code (code, vectype, optab_scalar);
4336 if (optab
4337 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4339 if (dump_enabled_p ())
4340 dump_printf_loc (MSG_NOTE, vect_location,
4341 "vector/scalar shift/rotate found.\n");
4343 else
4345 optab = optab_for_tree_code (code, vectype, optab_vector);
4346 if (optab
4347 && (optab_handler (optab, TYPE_MODE (vectype))
4348 != CODE_FOR_nothing))
4350 scalar_shift_arg = false;
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE, vect_location,
4354 "vector/vector shift/rotate found.\n");
4356 /* Unlike the other binary operators, shifts/rotates have
4357 the rhs being int, instead of the same type as the lhs,
4358 so make sure the scalar is the right type if we are
4359 dealing with vectors of long long/long/short/char. */
4360 if (dt[1] == vect_constant_def)
4361 op1 = fold_convert (TREE_TYPE (vectype), op1);
4362 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4363 TREE_TYPE (op1)))
4365 if (slp_node
4366 && TYPE_MODE (TREE_TYPE (vectype))
4367 != TYPE_MODE (TREE_TYPE (op1)))
4369 if (dump_enabled_p ())
4370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4371 "unusable type for last operand in"
4372 " vector/vector shift/rotate.\n");
4373 return false;
4375 if (vec_stmt && !slp_node)
4377 op1 = fold_convert (TREE_TYPE (vectype), op1);
4378 op1 = vect_init_vector (stmt, op1,
4379 TREE_TYPE (vectype), NULL);
4386 /* Supportable by target? */
4387 if (!optab)
4389 if (dump_enabled_p ())
4390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4391 "no optab.\n");
4392 return false;
4394 vec_mode = TYPE_MODE (vectype);
4395 icode = (int) optab_handler (optab, vec_mode);
4396 if (icode == CODE_FOR_nothing)
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4400 "op not supported by target.\n");
4401 /* Check only during analysis. */
4402 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4403 || (vf < vect_min_worthwhile_factor (code)
4404 && !vec_stmt))
4405 return false;
4406 if (dump_enabled_p ())
4407 dump_printf_loc (MSG_NOTE, vect_location,
4408 "proceeding using word mode.\n");
4411 /* Worthwhile without SIMD support? Check only during analysis. */
4412 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4413 && vf < vect_min_worthwhile_factor (code)
4414 && !vec_stmt)
4416 if (dump_enabled_p ())
4417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4418 "not worthwhile without SIMD support.\n");
4419 return false;
4422 if (!vec_stmt) /* transformation not required. */
4424 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4425 if (dump_enabled_p ())
4426 dump_printf_loc (MSG_NOTE, vect_location,
4427 "=== vectorizable_shift ===\n");
4428 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4429 return true;
4432 /** Transform. **/
4434 if (dump_enabled_p ())
4435 dump_printf_loc (MSG_NOTE, vect_location,
4436 "transform binary/unary operation.\n");
4438 /* Handle def. */
4439 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4441 prev_stmt_info = NULL;
4442 for (j = 0; j < ncopies; j++)
4444 /* Handle uses. */
4445 if (j == 0)
4447 if (scalar_shift_arg)
4449 /* Vector shl and shr insn patterns can be defined with scalar
4450 operand 2 (shift operand). In this case, use constant or loop
4451 invariant op1 directly, without extending it to vector mode
4452 first. */
4453 optab_op2_mode = insn_data[icode].operand[2].mode;
4454 if (!VECTOR_MODE_P (optab_op2_mode))
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_NOTE, vect_location,
4458 "operand 1 using scalar mode.\n");
4459 vec_oprnd1 = op1;
4460 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4461 vec_oprnds1.quick_push (vec_oprnd1);
4462 if (slp_node)
4464 /* Store vec_oprnd1 for every vector stmt to be created
4465 for SLP_NODE. We check during the analysis that all
4466 the shift arguments are the same.
4467 TODO: Allow different constants for different vector
4468 stmts generated for an SLP instance. */
4469 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4470 vec_oprnds1.quick_push (vec_oprnd1);
4475 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4476 (a special case for certain kind of vector shifts); otherwise,
4477 operand 1 should be of a vector type (the usual case). */
4478 if (vec_oprnd1)
4479 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4480 slp_node, -1);
4481 else
4482 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4483 slp_node, -1);
4485 else
4486 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4488 /* Arguments are ready. Create the new vector stmt. */
4489 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4491 vop1 = vec_oprnds1[i];
4492 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4493 new_temp = make_ssa_name (vec_dest, new_stmt);
4494 gimple_assign_set_lhs (new_stmt, new_temp);
4495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4496 if (slp_node)
4497 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4500 if (slp_node)
4501 continue;
4503 if (j == 0)
4504 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4505 else
4506 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4507 prev_stmt_info = vinfo_for_stmt (new_stmt);
4510 vec_oprnds0.release ();
4511 vec_oprnds1.release ();
4513 return true;
4517 /* Function vectorizable_operation.
4519 Check if STMT performs a binary, unary or ternary operation that can
4520 be vectorized.
4521 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4522 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4523 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4525 static bool
4526 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4527 gimple *vec_stmt, slp_tree slp_node)
4529 tree vec_dest;
4530 tree scalar_dest;
4531 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4533 tree vectype;
4534 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4535 enum tree_code code;
4536 enum machine_mode vec_mode;
4537 tree new_temp;
4538 int op_type;
4539 optab optab;
4540 int icode;
4541 tree def;
4542 gimple def_stmt;
4543 enum vect_def_type dt[3]
4544 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4545 gimple new_stmt = NULL;
4546 stmt_vec_info prev_stmt_info;
4547 int nunits_in;
4548 int nunits_out;
4549 tree vectype_out;
4550 int ncopies;
4551 int j, i;
4552 vec<tree> vec_oprnds0 = vNULL;
4553 vec<tree> vec_oprnds1 = vNULL;
4554 vec<tree> vec_oprnds2 = vNULL;
4555 tree vop0, vop1, vop2;
4556 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4557 int vf;
4559 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4560 return false;
4562 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4563 return false;
4565 /* Is STMT a vectorizable binary/unary operation? */
4566 if (!is_gimple_assign (stmt))
4567 return false;
4569 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4570 return false;
4572 code = gimple_assign_rhs_code (stmt);
4574 /* For pointer addition, we should use the normal plus for
4575 the vector addition. */
4576 if (code == POINTER_PLUS_EXPR)
4577 code = PLUS_EXPR;
4579 /* Support only unary or binary operations. */
4580 op_type = TREE_CODE_LENGTH (code);
4581 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4583 if (dump_enabled_p ())
4584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4585 "num. args = %d (not unary/binary/ternary op).\n",
4586 op_type);
4587 return false;
4590 scalar_dest = gimple_assign_lhs (stmt);
4591 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4593 /* Most operations cannot handle bit-precision types without extra
4594 truncations. */
4595 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4596 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4597 /* Exception are bitwise binary operations. */
4598 && code != BIT_IOR_EXPR
4599 && code != BIT_XOR_EXPR
4600 && code != BIT_AND_EXPR)
4602 if (dump_enabled_p ())
4603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4604 "bit-precision arithmetic not supported.\n");
4605 return false;
4608 op0 = gimple_assign_rhs1 (stmt);
4609 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4610 &def_stmt, &def, &dt[0], &vectype))
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4614 "use not simple.\n");
4615 return false;
4617 /* If op0 is an external or constant def use a vector type with
4618 the same size as the output vector type. */
4619 if (!vectype)
4620 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4621 if (vec_stmt)
4622 gcc_assert (vectype);
4623 if (!vectype)
4625 if (dump_enabled_p ())
4627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4628 "no vectype for scalar type ");
4629 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4630 TREE_TYPE (op0));
4631 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4634 return false;
4637 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4638 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4639 if (nunits_out != nunits_in)
4640 return false;
4642 if (op_type == binary_op || op_type == ternary_op)
4644 op1 = gimple_assign_rhs2 (stmt);
4645 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4646 &def, &dt[1]))
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4650 "use not simple.\n");
4651 return false;
4654 if (op_type == ternary_op)
4656 op2 = gimple_assign_rhs3 (stmt);
4657 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4658 &def, &dt[2]))
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4662 "use not simple.\n");
4663 return false;
4667 if (loop_vinfo)
4668 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4669 else
4670 vf = 1;
4672 /* Multiple types in SLP are handled by creating the appropriate number of
4673 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4674 case of SLP. */
4675 if (slp_node || PURE_SLP_STMT (stmt_info))
4676 ncopies = 1;
4677 else
4678 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4680 gcc_assert (ncopies >= 1);
4682 /* Shifts are handled in vectorizable_shift (). */
4683 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4684 || code == RROTATE_EXPR)
4685 return false;
4687 /* Supportable by target? */
4689 vec_mode = TYPE_MODE (vectype);
4690 if (code == MULT_HIGHPART_EXPR)
4692 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4693 icode = LAST_INSN_CODE;
4694 else
4695 icode = CODE_FOR_nothing;
4697 else
4699 optab = optab_for_tree_code (code, vectype, optab_default);
4700 if (!optab)
4702 if (dump_enabled_p ())
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4704 "no optab.\n");
4705 return false;
4707 icode = (int) optab_handler (optab, vec_mode);
4710 if (icode == CODE_FOR_nothing)
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4714 "op not supported by target.\n");
4715 /* Check only during analysis. */
4716 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4717 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4718 return false;
4719 if (dump_enabled_p ())
4720 dump_printf_loc (MSG_NOTE, vect_location,
4721 "proceeding using word mode.\n");
4724 /* Worthwhile without SIMD support? Check only during analysis. */
4725 if (!VECTOR_MODE_P (vec_mode)
4726 && !vec_stmt
4727 && vf < vect_min_worthwhile_factor (code))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "not worthwhile without SIMD support.\n");
4732 return false;
4735 if (!vec_stmt) /* transformation not required. */
4737 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_NOTE, vect_location,
4740 "=== vectorizable_operation ===\n");
4741 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4742 return true;
4745 /** Transform. **/
4747 if (dump_enabled_p ())
4748 dump_printf_loc (MSG_NOTE, vect_location,
4749 "transform binary/unary operation.\n");
4751 /* Handle def. */
4752 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4754 /* In case the vectorization factor (VF) is bigger than the number
4755 of elements that we can fit in a vectype (nunits), we have to generate
4756 more than one vector stmt - i.e - we need to "unroll" the
4757 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4758 from one copy of the vector stmt to the next, in the field
4759 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4760 stages to find the correct vector defs to be used when vectorizing
4761 stmts that use the defs of the current stmt. The example below
4762 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4763 we need to create 4 vectorized stmts):
4765 before vectorization:
4766 RELATED_STMT VEC_STMT
4767 S1: x = memref - -
4768 S2: z = x + 1 - -
4770 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4771 there):
4772 RELATED_STMT VEC_STMT
4773 VS1_0: vx0 = memref0 VS1_1 -
4774 VS1_1: vx1 = memref1 VS1_2 -
4775 VS1_2: vx2 = memref2 VS1_3 -
4776 VS1_3: vx3 = memref3 - -
4777 S1: x = load - VS1_0
4778 S2: z = x + 1 - -
4780 step2: vectorize stmt S2 (done here):
4781 To vectorize stmt S2 we first need to find the relevant vector
4782 def for the first operand 'x'. This is, as usual, obtained from
4783 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4784 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4785 relevant vector def 'vx0'. Having found 'vx0' we can generate
4786 the vector stmt VS2_0, and as usual, record it in the
4787 STMT_VINFO_VEC_STMT of stmt S2.
4788 When creating the second copy (VS2_1), we obtain the relevant vector
4789 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4790 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4791 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4792 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4793 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4794 chain of stmts and pointers:
4795 RELATED_STMT VEC_STMT
4796 VS1_0: vx0 = memref0 VS1_1 -
4797 VS1_1: vx1 = memref1 VS1_2 -
4798 VS1_2: vx2 = memref2 VS1_3 -
4799 VS1_3: vx3 = memref3 - -
4800 S1: x = load - VS1_0
4801 VS2_0: vz0 = vx0 + v1 VS2_1 -
4802 VS2_1: vz1 = vx1 + v1 VS2_2 -
4803 VS2_2: vz2 = vx2 + v1 VS2_3 -
4804 VS2_3: vz3 = vx3 + v1 - -
4805 S2: z = x + 1 - VS2_0 */
4807 prev_stmt_info = NULL;
4808 for (j = 0; j < ncopies; j++)
4810 /* Handle uses. */
4811 if (j == 0)
4813 if (op_type == binary_op || op_type == ternary_op)
4814 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4815 slp_node, -1);
4816 else
4817 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4818 slp_node, -1);
4819 if (op_type == ternary_op)
4821 vec_oprnds2.create (1);
4822 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4823 stmt,
4824 NULL));
4827 else
4829 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4830 if (op_type == ternary_op)
4832 tree vec_oprnd = vec_oprnds2.pop ();
4833 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4834 vec_oprnd));
4838 /* Arguments are ready. Create the new vector stmt. */
4839 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4841 vop1 = ((op_type == binary_op || op_type == ternary_op)
4842 ? vec_oprnds1[i] : NULL_TREE);
4843 vop2 = ((op_type == ternary_op)
4844 ? vec_oprnds2[i] : NULL_TREE);
4845 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4846 vop0, vop1, vop2);
4847 new_temp = make_ssa_name (vec_dest, new_stmt);
4848 gimple_assign_set_lhs (new_stmt, new_temp);
4849 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4850 if (slp_node)
4851 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4854 if (slp_node)
4855 continue;
4857 if (j == 0)
4858 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4859 else
4860 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4861 prev_stmt_info = vinfo_for_stmt (new_stmt);
4864 vec_oprnds0.release ();
4865 vec_oprnds1.release ();
4866 vec_oprnds2.release ();
4868 return true;
4871 /* A helper function to ensure data reference DR's base alignment
4872 for STMT_INFO. */
4874 static void
4875 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4877 if (!dr->aux)
4878 return;
4880 if (((dataref_aux *)dr->aux)->base_misaligned)
4882 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4883 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4885 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4886 DECL_USER_ALIGN (base_decl) = 1;
4887 ((dataref_aux *)dr->aux)->base_misaligned = false;
4892 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4893 reversal of the vector elements. If that is impossible to do,
4894 returns NULL. */
4896 static tree
4897 perm_mask_for_reverse (tree vectype)
4899 int i, nunits;
4900 unsigned char *sel;
4902 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4903 sel = XALLOCAVEC (unsigned char, nunits);
4905 for (i = 0; i < nunits; ++i)
4906 sel[i] = nunits - 1 - i;
4908 return vect_gen_perm_mask (vectype, sel);
4911 /* Function vectorizable_store.
4913 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4914 can be vectorized.
4915 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4916 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4917 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4919 static bool
4920 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4921 slp_tree slp_node)
4923 tree scalar_dest;
4924 tree data_ref;
4925 tree op;
4926 tree vec_oprnd = NULL_TREE;
4927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4928 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4930 tree elem_type;
4931 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4932 struct loop *loop = NULL;
4933 enum machine_mode vec_mode;
4934 tree dummy;
4935 enum dr_alignment_support alignment_support_scheme;
4936 tree def;
4937 gimple def_stmt;
4938 enum vect_def_type dt;
4939 stmt_vec_info prev_stmt_info = NULL;
4940 tree dataref_ptr = NULL_TREE;
4941 tree dataref_offset = NULL_TREE;
4942 gimple ptr_incr = NULL;
4943 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4944 int ncopies;
4945 int j;
4946 gimple next_stmt, first_stmt = NULL;
4947 bool grouped_store = false;
4948 bool store_lanes_p = false;
4949 unsigned int group_size, i;
4950 vec<tree> dr_chain = vNULL;
4951 vec<tree> oprnds = vNULL;
4952 vec<tree> result_chain = vNULL;
4953 bool inv_p;
4954 bool negative = false;
4955 tree offset = NULL_TREE;
4956 vec<tree> vec_oprnds = vNULL;
4957 bool slp = (slp_node != NULL);
4958 unsigned int vec_num;
4959 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4960 tree aggr_type;
4962 if (loop_vinfo)
4963 loop = LOOP_VINFO_LOOP (loop_vinfo);
4965 /* Multiple types in SLP are handled by creating the appropriate number of
4966 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4967 case of SLP. */
4968 if (slp || PURE_SLP_STMT (stmt_info))
4969 ncopies = 1;
4970 else
4971 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4973 gcc_assert (ncopies >= 1);
4975 /* FORNOW. This restriction should be relaxed. */
4976 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4980 "multiple types in nested loop.\n");
4981 return false;
4984 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4985 return false;
4987 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4988 return false;
4990 /* Is vectorizable store? */
4992 if (!is_gimple_assign (stmt))
4993 return false;
4995 scalar_dest = gimple_assign_lhs (stmt);
4996 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
4997 && is_pattern_stmt_p (stmt_info))
4998 scalar_dest = TREE_OPERAND (scalar_dest, 0);
4999 if (TREE_CODE (scalar_dest) != ARRAY_REF
5000 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5001 && TREE_CODE (scalar_dest) != INDIRECT_REF
5002 && TREE_CODE (scalar_dest) != COMPONENT_REF
5003 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5004 && TREE_CODE (scalar_dest) != REALPART_EXPR
5005 && TREE_CODE (scalar_dest) != MEM_REF)
5006 return false;
5008 gcc_assert (gimple_assign_single_p (stmt));
5009 op = gimple_assign_rhs1 (stmt);
5010 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5011 &def, &dt))
5013 if (dump_enabled_p ())
5014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5015 "use not simple.\n");
5016 return false;
5019 elem_type = TREE_TYPE (vectype);
5020 vec_mode = TYPE_MODE (vectype);
5022 /* FORNOW. In some cases can vectorize even if data-type not supported
5023 (e.g. - array initialization with 0). */
5024 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5025 return false;
5027 if (!STMT_VINFO_DATA_REF (stmt_info))
5028 return false;
5030 negative =
5031 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5032 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5033 size_zero_node) < 0;
5034 if (negative && ncopies > 1)
5036 if (dump_enabled_p ())
5037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5038 "multiple types with negative step.\n");
5039 return false;
5042 if (negative)
5044 gcc_assert (!grouped_store);
5045 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5046 if (alignment_support_scheme != dr_aligned
5047 && alignment_support_scheme != dr_unaligned_supported)
5049 if (dump_enabled_p ())
5050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5051 "negative step but alignment required.\n");
5052 return false;
5054 if (dt != vect_constant_def
5055 && dt != vect_external_def
5056 && !perm_mask_for_reverse (vectype))
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5060 "negative step and reversing not supported.\n");
5061 return false;
5065 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5067 grouped_store = true;
5068 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5069 if (!slp && !PURE_SLP_STMT (stmt_info))
5071 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5072 if (vect_store_lanes_supported (vectype, group_size))
5073 store_lanes_p = true;
5074 else if (!vect_grouped_store_supported (vectype, group_size))
5075 return false;
5078 if (first_stmt == stmt)
5080 /* STMT is the leader of the group. Check the operands of all the
5081 stmts of the group. */
5082 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5083 while (next_stmt)
5085 gcc_assert (gimple_assign_single_p (next_stmt));
5086 op = gimple_assign_rhs1 (next_stmt);
5087 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5088 &def_stmt, &def, &dt))
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "use not simple.\n");
5093 return false;
5095 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5100 if (!vec_stmt) /* transformation not required. */
5102 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5103 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5104 NULL, NULL, NULL);
5105 return true;
5108 /** Transform. **/
5110 ensure_base_align (stmt_info, dr);
5112 if (grouped_store)
5114 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5115 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5117 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5119 /* FORNOW */
5120 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5122 /* We vectorize all the stmts of the interleaving group when we
5123 reach the last stmt in the group. */
5124 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5125 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5126 && !slp)
5128 *vec_stmt = NULL;
5129 return true;
5132 if (slp)
5134 grouped_store = false;
5135 /* VEC_NUM is the number of vect stmts to be created for this
5136 group. */
5137 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5138 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5139 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5140 op = gimple_assign_rhs1 (first_stmt);
5142 else
5143 /* VEC_NUM is the number of vect stmts to be created for this
5144 group. */
5145 vec_num = group_size;
5147 else
5149 first_stmt = stmt;
5150 first_dr = dr;
5151 group_size = vec_num = 1;
5154 if (dump_enabled_p ())
5155 dump_printf_loc (MSG_NOTE, vect_location,
5156 "transform store. ncopies = %d\n", ncopies);
5158 dr_chain.create (group_size);
5159 oprnds.create (group_size);
5161 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5162 gcc_assert (alignment_support_scheme);
5163 /* Targets with store-lane instructions must not require explicit
5164 realignment. */
5165 gcc_assert (!store_lanes_p
5166 || alignment_support_scheme == dr_aligned
5167 || alignment_support_scheme == dr_unaligned_supported);
5169 if (negative)
5170 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5172 if (store_lanes_p)
5173 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5174 else
5175 aggr_type = vectype;
5177 /* In case the vectorization factor (VF) is bigger than the number
5178 of elements that we can fit in a vectype (nunits), we have to generate
5179 more than one vector stmt - i.e - we need to "unroll" the
5180 vector stmt by a factor VF/nunits. For more details see documentation in
5181 vect_get_vec_def_for_copy_stmt. */
5183 /* In case of interleaving (non-unit grouped access):
5185 S1: &base + 2 = x2
5186 S2: &base = x0
5187 S3: &base + 1 = x1
5188 S4: &base + 3 = x3
5190 We create vectorized stores starting from base address (the access of the
5191 first stmt in the chain (S2 in the above example), when the last store stmt
5192 of the chain (S4) is reached:
5194 VS1: &base = vx2
5195 VS2: &base + vec_size*1 = vx0
5196 VS3: &base + vec_size*2 = vx1
5197 VS4: &base + vec_size*3 = vx3
5199 Then permutation statements are generated:
5201 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5202 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5205 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5206 (the order of the data-refs in the output of vect_permute_store_chain
5207 corresponds to the order of scalar stmts in the interleaving chain - see
5208 the documentation of vect_permute_store_chain()).
5210 In case of both multiple types and interleaving, above vector stores and
5211 permutation stmts are created for every copy. The result vector stmts are
5212 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5213 STMT_VINFO_RELATED_STMT for the next copies.
5216 prev_stmt_info = NULL;
5217 for (j = 0; j < ncopies; j++)
5219 gimple new_stmt;
5221 if (j == 0)
5223 if (slp)
5225 /* Get vectorized arguments for SLP_NODE. */
5226 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5227 NULL, slp_node, -1);
5229 vec_oprnd = vec_oprnds[0];
5231 else
5233 /* For interleaved stores we collect vectorized defs for all the
5234 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5235 used as an input to vect_permute_store_chain(), and OPRNDS as
5236 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5238 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5239 OPRNDS are of size 1. */
5240 next_stmt = first_stmt;
5241 for (i = 0; i < group_size; i++)
5243 /* Since gaps are not supported for interleaved stores,
5244 GROUP_SIZE is the exact number of stmts in the chain.
5245 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5246 there is no interleaving, GROUP_SIZE is 1, and only one
5247 iteration of the loop will be executed. */
5248 gcc_assert (next_stmt
5249 && gimple_assign_single_p (next_stmt));
5250 op = gimple_assign_rhs1 (next_stmt);
5252 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5253 NULL);
5254 dr_chain.quick_push (vec_oprnd);
5255 oprnds.quick_push (vec_oprnd);
5256 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5260 /* We should have catched mismatched types earlier. */
5261 gcc_assert (useless_type_conversion_p (vectype,
5262 TREE_TYPE (vec_oprnd)));
5263 bool simd_lane_access_p
5264 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5265 if (simd_lane_access_p
5266 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5267 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5268 && integer_zerop (DR_OFFSET (first_dr))
5269 && integer_zerop (DR_INIT (first_dr))
5270 && alias_sets_conflict_p (get_alias_set (aggr_type),
5271 get_alias_set (DR_REF (first_dr))))
5273 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5274 dataref_offset = build_int_cst (reference_alias_ptr_type
5275 (DR_REF (first_dr)), 0);
5276 inv_p = false;
5278 else
5279 dataref_ptr
5280 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5281 simd_lane_access_p ? loop : NULL,
5282 offset, &dummy, gsi, &ptr_incr,
5283 simd_lane_access_p, &inv_p);
5284 gcc_assert (bb_vinfo || !inv_p);
5286 else
5288 /* For interleaved stores we created vectorized defs for all the
5289 defs stored in OPRNDS in the previous iteration (previous copy).
5290 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5291 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5292 next copy.
5293 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5294 OPRNDS are of size 1. */
5295 for (i = 0; i < group_size; i++)
5297 op = oprnds[i];
5298 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5299 &def, &dt);
5300 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5301 dr_chain[i] = vec_oprnd;
5302 oprnds[i] = vec_oprnd;
5304 if (dataref_offset)
5305 dataref_offset
5306 = int_const_binop (PLUS_EXPR, dataref_offset,
5307 TYPE_SIZE_UNIT (aggr_type));
5308 else
5309 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5310 TYPE_SIZE_UNIT (aggr_type));
5313 if (store_lanes_p)
5315 tree vec_array;
5317 /* Combine all the vectors into an array. */
5318 vec_array = create_vector_array (vectype, vec_num);
5319 for (i = 0; i < vec_num; i++)
5321 vec_oprnd = dr_chain[i];
5322 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5325 /* Emit:
5326 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5327 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5328 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5329 gimple_call_set_lhs (new_stmt, data_ref);
5330 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5332 else
5334 new_stmt = NULL;
5335 if (grouped_store)
5337 if (j == 0)
5338 result_chain.create (group_size);
5339 /* Permute. */
5340 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5341 &result_chain);
5344 next_stmt = first_stmt;
5345 for (i = 0; i < vec_num; i++)
5347 unsigned align, misalign;
5349 if (i > 0)
5350 /* Bump the vector pointer. */
5351 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5352 stmt, NULL_TREE);
5354 if (slp)
5355 vec_oprnd = vec_oprnds[i];
5356 else if (grouped_store)
5357 /* For grouped stores vectorized defs are interleaved in
5358 vect_permute_store_chain(). */
5359 vec_oprnd = result_chain[i];
5361 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5362 dataref_offset
5363 ? dataref_offset
5364 : build_int_cst (reference_alias_ptr_type
5365 (DR_REF (first_dr)), 0));
5366 align = TYPE_ALIGN_UNIT (vectype);
5367 if (aligned_access_p (first_dr))
5368 misalign = 0;
5369 else if (DR_MISALIGNMENT (first_dr) == -1)
5371 TREE_TYPE (data_ref)
5372 = build_aligned_type (TREE_TYPE (data_ref),
5373 TYPE_ALIGN (elem_type));
5374 align = TYPE_ALIGN_UNIT (elem_type);
5375 misalign = 0;
5377 else
5379 TREE_TYPE (data_ref)
5380 = build_aligned_type (TREE_TYPE (data_ref),
5381 TYPE_ALIGN (elem_type));
5382 misalign = DR_MISALIGNMENT (first_dr);
5384 if (dataref_offset == NULL_TREE)
5385 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5386 misalign);
5388 if (negative
5389 && dt != vect_constant_def
5390 && dt != vect_external_def)
5392 tree perm_mask = perm_mask_for_reverse (vectype);
5393 tree perm_dest
5394 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5395 vectype);
5396 tree new_temp = make_ssa_name (perm_dest, NULL);
5398 /* Generate the permute statement. */
5399 gimple perm_stmt
5400 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5401 vec_oprnd, vec_oprnd,
5402 perm_mask);
5403 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5405 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5406 vec_oprnd = new_temp;
5409 /* Arguments are ready. Create the new vector stmt. */
5410 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5413 if (slp)
5414 continue;
5416 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5417 if (!next_stmt)
5418 break;
5421 if (!slp)
5423 if (j == 0)
5424 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5425 else
5426 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5427 prev_stmt_info = vinfo_for_stmt (new_stmt);
5431 dr_chain.release ();
5432 oprnds.release ();
5433 result_chain.release ();
5434 vec_oprnds.release ();
5436 return true;
5439 /* Given a vector type VECTYPE and permutation SEL returns
5440 the VECTOR_CST mask that implements the permutation of the
5441 vector elements. If that is impossible to do, returns NULL. */
5443 tree
5444 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5446 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5447 int i, nunits;
5449 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5451 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5452 return NULL;
5454 mask_elt_type = lang_hooks.types.type_for_mode
5455 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5456 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5458 mask_elts = XALLOCAVEC (tree, nunits);
5459 for (i = nunits - 1; i >= 0; i--)
5460 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5461 mask_vec = build_vector (mask_type, mask_elts);
5463 return mask_vec;
5466 /* Given a vector variable X and Y, that was generated for the scalar
5467 STMT, generate instructions to permute the vector elements of X and Y
5468 using permutation mask MASK_VEC, insert them at *GSI and return the
5469 permuted vector variable. */
5471 static tree
5472 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5473 gimple_stmt_iterator *gsi)
5475 tree vectype = TREE_TYPE (x);
5476 tree perm_dest, data_ref;
5477 gimple perm_stmt;
5479 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5480 data_ref = make_ssa_name (perm_dest, NULL);
5482 /* Generate the permute statement. */
5483 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5484 x, y, mask_vec);
5485 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5487 return data_ref;
5490 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5491 inserting them on the loops preheader edge. Returns true if we
5492 were successful in doing so (and thus STMT can be moved then),
5493 otherwise returns false. */
5495 static bool
5496 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5498 ssa_op_iter i;
5499 tree op;
5500 bool any = false;
5502 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5504 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5505 if (!gimple_nop_p (def_stmt)
5506 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5508 /* Make sure we don't need to recurse. While we could do
5509 so in simple cases when there are more complex use webs
5510 we don't have an easy way to preserve stmt order to fulfil
5511 dependencies within them. */
5512 tree op2;
5513 ssa_op_iter i2;
5514 if (gimple_code (def_stmt) == GIMPLE_PHI)
5515 return false;
5516 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5518 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5519 if (!gimple_nop_p (def_stmt2)
5520 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5521 return false;
5523 any = true;
5527 if (!any)
5528 return true;
5530 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5532 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5533 if (!gimple_nop_p (def_stmt)
5534 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5536 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5537 gsi_remove (&gsi, false);
5538 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5542 return true;
5545 /* vectorizable_load.
5547 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5548 can be vectorized.
5549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5550 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5551 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5553 static bool
5554 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5555 slp_tree slp_node, slp_instance slp_node_instance)
5557 tree scalar_dest;
5558 tree vec_dest = NULL;
5559 tree data_ref = NULL;
5560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5561 stmt_vec_info prev_stmt_info;
5562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5563 struct loop *loop = NULL;
5564 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5565 bool nested_in_vect_loop = false;
5566 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5567 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5568 tree elem_type;
5569 tree new_temp;
5570 enum machine_mode mode;
5571 gimple new_stmt = NULL;
5572 tree dummy;
5573 enum dr_alignment_support alignment_support_scheme;
5574 tree dataref_ptr = NULL_TREE;
5575 tree dataref_offset = NULL_TREE;
5576 gimple ptr_incr = NULL;
5577 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5578 int ncopies;
5579 int i, j, group_size, group_gap;
5580 tree msq = NULL_TREE, lsq;
5581 tree offset = NULL_TREE;
5582 tree realignment_token = NULL_TREE;
5583 gimple phi = NULL;
5584 vec<tree> dr_chain = vNULL;
5585 bool grouped_load = false;
5586 bool load_lanes_p = false;
5587 gimple first_stmt;
5588 bool inv_p;
5589 bool negative = false;
5590 bool compute_in_loop = false;
5591 struct loop *at_loop;
5592 int vec_num;
5593 bool slp = (slp_node != NULL);
5594 bool slp_perm = false;
5595 enum tree_code code;
5596 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5597 int vf;
5598 tree aggr_type;
5599 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5600 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5601 int gather_scale = 1;
5602 enum vect_def_type gather_dt = vect_unknown_def_type;
5604 if (loop_vinfo)
5606 loop = LOOP_VINFO_LOOP (loop_vinfo);
5607 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5608 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5610 else
5611 vf = 1;
5613 /* Multiple types in SLP are handled by creating the appropriate number of
5614 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5615 case of SLP. */
5616 if (slp || PURE_SLP_STMT (stmt_info))
5617 ncopies = 1;
5618 else
5619 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5621 gcc_assert (ncopies >= 1);
5623 /* FORNOW. This restriction should be relaxed. */
5624 if (nested_in_vect_loop && ncopies > 1)
5626 if (dump_enabled_p ())
5627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5628 "multiple types in nested loop.\n");
5629 return false;
5632 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5633 return false;
5635 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5636 return false;
5638 /* Is vectorizable load? */
5639 if (!is_gimple_assign (stmt))
5640 return false;
5642 scalar_dest = gimple_assign_lhs (stmt);
5643 if (TREE_CODE (scalar_dest) != SSA_NAME)
5644 return false;
5646 code = gimple_assign_rhs_code (stmt);
5647 if (code != ARRAY_REF
5648 && code != BIT_FIELD_REF
5649 && code != INDIRECT_REF
5650 && code != COMPONENT_REF
5651 && code != IMAGPART_EXPR
5652 && code != REALPART_EXPR
5653 && code != MEM_REF
5654 && TREE_CODE_CLASS (code) != tcc_declaration)
5655 return false;
5657 if (!STMT_VINFO_DATA_REF (stmt_info))
5658 return false;
5660 elem_type = TREE_TYPE (vectype);
5661 mode = TYPE_MODE (vectype);
5663 /* FORNOW. In some cases can vectorize even if data-type not supported
5664 (e.g. - data copies). */
5665 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "Aligned load, but unsupported type.\n");
5670 return false;
5673 /* Check if the load is a part of an interleaving chain. */
5674 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5676 grouped_load = true;
5677 /* FORNOW */
5678 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5680 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5681 if (!slp && !PURE_SLP_STMT (stmt_info))
5683 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5684 if (vect_load_lanes_supported (vectype, group_size))
5685 load_lanes_p = true;
5686 else if (!vect_grouped_load_supported (vectype, group_size))
5687 return false;
5692 if (STMT_VINFO_GATHER_P (stmt_info))
5694 gimple def_stmt;
5695 tree def;
5696 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5697 &gather_off, &gather_scale);
5698 gcc_assert (gather_decl);
5699 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5700 &def_stmt, &def, &gather_dt,
5701 &gather_off_vectype))
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5705 "gather index use not simple.\n");
5706 return false;
5709 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5711 else
5713 negative = tree_int_cst_compare (nested_in_vect_loop
5714 ? STMT_VINFO_DR_STEP (stmt_info)
5715 : DR_STEP (dr),
5716 size_zero_node) < 0;
5717 if (negative && ncopies > 1)
5719 if (dump_enabled_p ())
5720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5721 "multiple types with negative step.\n");
5722 return false;
5725 if (negative)
5727 if (grouped_load)
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5731 "negative step for group load not supported"
5732 "\n");
5733 return false;
5735 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5736 if (alignment_support_scheme != dr_aligned
5737 && alignment_support_scheme != dr_unaligned_supported)
5739 if (dump_enabled_p ())
5740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5741 "negative step but alignment required.\n");
5742 return false;
5744 if (!perm_mask_for_reverse (vectype))
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5748 "negative step and reversing not supported."
5749 "\n");
5750 return false;
5755 if (!vec_stmt) /* transformation not required. */
5757 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5758 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5759 return true;
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_NOTE, vect_location,
5764 "transform load. ncopies = %d\n", ncopies);
5766 /** Transform. **/
5768 ensure_base_align (stmt_info, dr);
5770 if (STMT_VINFO_GATHER_P (stmt_info))
5772 tree vec_oprnd0 = NULL_TREE, op;
5773 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5774 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5775 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5776 edge pe = loop_preheader_edge (loop);
5777 gimple_seq seq;
5778 basic_block new_bb;
5779 enum { NARROW, NONE, WIDEN } modifier;
5780 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5782 if (nunits == gather_off_nunits)
5783 modifier = NONE;
5784 else if (nunits == gather_off_nunits / 2)
5786 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5787 modifier = WIDEN;
5789 for (i = 0; i < gather_off_nunits; ++i)
5790 sel[i] = i | nunits;
5792 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5793 gcc_assert (perm_mask != NULL_TREE);
5795 else if (nunits == gather_off_nunits * 2)
5797 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5798 modifier = NARROW;
5800 for (i = 0; i < nunits; ++i)
5801 sel[i] = i < gather_off_nunits
5802 ? i : i + nunits - gather_off_nunits;
5804 perm_mask = vect_gen_perm_mask (vectype, sel);
5805 gcc_assert (perm_mask != NULL_TREE);
5806 ncopies *= 2;
5808 else
5809 gcc_unreachable ();
5811 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5812 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5813 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5814 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5815 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5816 scaletype = TREE_VALUE (arglist);
5817 gcc_checking_assert (types_compatible_p (srctype, rettype));
5819 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5821 ptr = fold_convert (ptrtype, gather_base);
5822 if (!is_gimple_min_invariant (ptr))
5824 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5825 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5826 gcc_assert (!new_bb);
5829 /* Currently we support only unconditional gather loads,
5830 so mask should be all ones. */
5831 if (TREE_CODE (masktype) == INTEGER_TYPE)
5832 mask = build_int_cst (masktype, -1);
5833 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5835 mask = build_int_cst (TREE_TYPE (masktype), -1);
5836 mask = build_vector_from_val (masktype, mask);
5837 mask = vect_init_vector (stmt, mask, masktype, NULL);
5839 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5841 REAL_VALUE_TYPE r;
5842 long tmp[6];
5843 for (j = 0; j < 6; ++j)
5844 tmp[j] = -1;
5845 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5846 mask = build_real (TREE_TYPE (masktype), r);
5847 mask = build_vector_from_val (masktype, mask);
5848 mask = vect_init_vector (stmt, mask, masktype, NULL);
5850 else
5851 gcc_unreachable ();
5853 scale = build_int_cst (scaletype, gather_scale);
5855 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5856 merge = build_int_cst (TREE_TYPE (rettype), 0);
5857 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5859 REAL_VALUE_TYPE r;
5860 long tmp[6];
5861 for (j = 0; j < 6; ++j)
5862 tmp[j] = 0;
5863 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5864 merge = build_real (TREE_TYPE (rettype), r);
5866 else
5867 gcc_unreachable ();
5868 merge = build_vector_from_val (rettype, merge);
5869 merge = vect_init_vector (stmt, merge, rettype, NULL);
5871 prev_stmt_info = NULL;
5872 for (j = 0; j < ncopies; ++j)
5874 if (modifier == WIDEN && (j & 1))
5875 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5876 perm_mask, stmt, gsi);
5877 else if (j == 0)
5878 op = vec_oprnd0
5879 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5880 else
5881 op = vec_oprnd0
5882 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5884 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5886 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5887 == TYPE_VECTOR_SUBPARTS (idxtype));
5888 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5889 var = make_ssa_name (var, NULL);
5890 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5891 new_stmt
5892 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5893 op, NULL_TREE);
5894 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5895 op = var;
5898 new_stmt
5899 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5901 if (!useless_type_conversion_p (vectype, rettype))
5903 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5904 == TYPE_VECTOR_SUBPARTS (rettype));
5905 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5906 op = make_ssa_name (var, new_stmt);
5907 gimple_call_set_lhs (new_stmt, op);
5908 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5909 var = make_ssa_name (vec_dest, NULL);
5910 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5911 new_stmt
5912 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5913 NULL_TREE);
5915 else
5917 var = make_ssa_name (vec_dest, new_stmt);
5918 gimple_call_set_lhs (new_stmt, var);
5921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5923 if (modifier == NARROW)
5925 if ((j & 1) == 0)
5927 prev_res = var;
5928 continue;
5930 var = permute_vec_elements (prev_res, var,
5931 perm_mask, stmt, gsi);
5932 new_stmt = SSA_NAME_DEF_STMT (var);
5935 if (prev_stmt_info == NULL)
5936 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5937 else
5938 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5939 prev_stmt_info = vinfo_for_stmt (new_stmt);
5941 return true;
5943 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5945 gimple_stmt_iterator incr_gsi;
5946 bool insert_after;
5947 gimple incr;
5948 tree offvar;
5949 tree ivstep;
5950 tree running_off;
5951 vec<constructor_elt, va_gc> *v = NULL;
5952 gimple_seq stmts = NULL;
5953 tree stride_base, stride_step, alias_off;
5955 gcc_assert (!nested_in_vect_loop);
5957 stride_base
5958 = fold_build_pointer_plus
5959 (unshare_expr (DR_BASE_ADDRESS (dr)),
5960 size_binop (PLUS_EXPR,
5961 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5962 convert_to_ptrofftype (DR_INIT (dr))));
5963 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5965 /* For a load with loop-invariant (but other than power-of-2)
5966 stride (i.e. not a grouped access) like so:
5968 for (i = 0; i < n; i += stride)
5969 ... = array[i];
5971 we generate a new induction variable and new accesses to
5972 form a new vector (or vectors, depending on ncopies):
5974 for (j = 0; ; j += VF*stride)
5975 tmp1 = array[j];
5976 tmp2 = array[j + stride];
5978 vectemp = {tmp1, tmp2, ...}
5981 ivstep = stride_step;
5982 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5983 build_int_cst (TREE_TYPE (ivstep), vf));
5985 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5987 create_iv (stride_base, ivstep, NULL,
5988 loop, &incr_gsi, insert_after,
5989 &offvar, NULL);
5990 incr = gsi_stmt (incr_gsi);
5991 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5993 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5994 if (stmts)
5995 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5997 prev_stmt_info = NULL;
5998 running_off = offvar;
5999 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6000 for (j = 0; j < ncopies; j++)
6002 tree vec_inv;
6004 vec_alloc (v, nunits);
6005 for (i = 0; i < nunits; i++)
6007 tree newref, newoff;
6008 gimple incr;
6009 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6010 running_off, alias_off);
6012 newref = force_gimple_operand_gsi (gsi, newref, true,
6013 NULL_TREE, true,
6014 GSI_SAME_STMT);
6015 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6016 newoff = copy_ssa_name (running_off, NULL);
6017 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6018 running_off, stride_step);
6019 vect_finish_stmt_generation (stmt, incr, gsi);
6021 running_off = newoff;
6024 vec_inv = build_constructor (vectype, v);
6025 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6026 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6028 if (j == 0)
6029 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6030 else
6031 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6032 prev_stmt_info = vinfo_for_stmt (new_stmt);
6034 return true;
6037 if (grouped_load)
6039 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6040 if (slp
6041 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6042 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6043 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6045 /* Check if the chain of loads is already vectorized. */
6046 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6047 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6048 ??? But we can only do so if there is exactly one
6049 as we have no way to get at the rest. Leave the CSE
6050 opportunity alone.
6051 ??? With the group load eventually participating
6052 in multiple different permutations (having multiple
6053 slp nodes which refer to the same group) the CSE
6054 is even wrong code. See PR56270. */
6055 && !slp)
6057 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6058 return true;
6060 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6061 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6063 /* VEC_NUM is the number of vect stmts to be created for this group. */
6064 if (slp)
6066 grouped_load = false;
6067 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6068 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6069 slp_perm = true;
6070 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6072 else
6074 vec_num = group_size;
6075 group_gap = 0;
6078 else
6080 first_stmt = stmt;
6081 first_dr = dr;
6082 group_size = vec_num = 1;
6083 group_gap = 0;
6086 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6087 gcc_assert (alignment_support_scheme);
6088 /* Targets with load-lane instructions must not require explicit
6089 realignment. */
6090 gcc_assert (!load_lanes_p
6091 || alignment_support_scheme == dr_aligned
6092 || alignment_support_scheme == dr_unaligned_supported);
6094 /* In case the vectorization factor (VF) is bigger than the number
6095 of elements that we can fit in a vectype (nunits), we have to generate
6096 more than one vector stmt - i.e - we need to "unroll" the
6097 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6098 from one copy of the vector stmt to the next, in the field
6099 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6100 stages to find the correct vector defs to be used when vectorizing
6101 stmts that use the defs of the current stmt. The example below
6102 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6103 need to create 4 vectorized stmts):
6105 before vectorization:
6106 RELATED_STMT VEC_STMT
6107 S1: x = memref - -
6108 S2: z = x + 1 - -
6110 step 1: vectorize stmt S1:
6111 We first create the vector stmt VS1_0, and, as usual, record a
6112 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6113 Next, we create the vector stmt VS1_1, and record a pointer to
6114 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6115 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6116 stmts and pointers:
6117 RELATED_STMT VEC_STMT
6118 VS1_0: vx0 = memref0 VS1_1 -
6119 VS1_1: vx1 = memref1 VS1_2 -
6120 VS1_2: vx2 = memref2 VS1_3 -
6121 VS1_3: vx3 = memref3 - -
6122 S1: x = load - VS1_0
6123 S2: z = x + 1 - -
6125 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6126 information we recorded in RELATED_STMT field is used to vectorize
6127 stmt S2. */
6129 /* In case of interleaving (non-unit grouped access):
6131 S1: x2 = &base + 2
6132 S2: x0 = &base
6133 S3: x1 = &base + 1
6134 S4: x3 = &base + 3
6136 Vectorized loads are created in the order of memory accesses
6137 starting from the access of the first stmt of the chain:
6139 VS1: vx0 = &base
6140 VS2: vx1 = &base + vec_size*1
6141 VS3: vx3 = &base + vec_size*2
6142 VS4: vx4 = &base + vec_size*3
6144 Then permutation statements are generated:
6146 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6147 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6150 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6151 (the order of the data-refs in the output of vect_permute_load_chain
6152 corresponds to the order of scalar stmts in the interleaving chain - see
6153 the documentation of vect_permute_load_chain()).
6154 The generation of permutation stmts and recording them in
6155 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6157 In case of both multiple types and interleaving, the vector loads and
6158 permutation stmts above are created for every copy. The result vector
6159 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6160 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6162 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6163 on a target that supports unaligned accesses (dr_unaligned_supported)
6164 we generate the following code:
6165 p = initial_addr;
6166 indx = 0;
6167 loop {
6168 p = p + indx * vectype_size;
6169 vec_dest = *(p);
6170 indx = indx + 1;
6173 Otherwise, the data reference is potentially unaligned on a target that
6174 does not support unaligned accesses (dr_explicit_realign_optimized) -
6175 then generate the following code, in which the data in each iteration is
6176 obtained by two vector loads, one from the previous iteration, and one
6177 from the current iteration:
6178 p1 = initial_addr;
6179 msq_init = *(floor(p1))
6180 p2 = initial_addr + VS - 1;
6181 realignment_token = call target_builtin;
6182 indx = 0;
6183 loop {
6184 p2 = p2 + indx * vectype_size
6185 lsq = *(floor(p2))
6186 vec_dest = realign_load (msq, lsq, realignment_token)
6187 indx = indx + 1;
6188 msq = lsq;
6189 } */
6191 /* If the misalignment remains the same throughout the execution of the
6192 loop, we can create the init_addr and permutation mask at the loop
6193 preheader. Otherwise, it needs to be created inside the loop.
6194 This can only occur when vectorizing memory accesses in the inner-loop
6195 nested within an outer-loop that is being vectorized. */
6197 if (nested_in_vect_loop
6198 && (TREE_INT_CST_LOW (DR_STEP (dr))
6199 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6201 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6202 compute_in_loop = true;
6205 if ((alignment_support_scheme == dr_explicit_realign_optimized
6206 || alignment_support_scheme == dr_explicit_realign)
6207 && !compute_in_loop)
6209 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6210 alignment_support_scheme, NULL_TREE,
6211 &at_loop);
6212 if (alignment_support_scheme == dr_explicit_realign_optimized)
6214 phi = SSA_NAME_DEF_STMT (msq);
6215 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6218 else
6219 at_loop = loop;
6221 if (negative)
6222 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6224 if (load_lanes_p)
6225 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6226 else
6227 aggr_type = vectype;
6229 prev_stmt_info = NULL;
6230 for (j = 0; j < ncopies; j++)
6232 /* 1. Create the vector or array pointer update chain. */
6233 if (j == 0)
6235 bool simd_lane_access_p
6236 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6237 if (simd_lane_access_p
6238 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6239 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6240 && integer_zerop (DR_OFFSET (first_dr))
6241 && integer_zerop (DR_INIT (first_dr))
6242 && alias_sets_conflict_p (get_alias_set (aggr_type),
6243 get_alias_set (DR_REF (first_dr)))
6244 && (alignment_support_scheme == dr_aligned
6245 || alignment_support_scheme == dr_unaligned_supported))
6247 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6248 dataref_offset = build_int_cst (reference_alias_ptr_type
6249 (DR_REF (first_dr)), 0);
6250 inv_p = false;
6252 else
6253 dataref_ptr
6254 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6255 offset, &dummy, gsi, &ptr_incr,
6256 simd_lane_access_p, &inv_p);
6258 else if (dataref_offset)
6259 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6260 TYPE_SIZE_UNIT (aggr_type));
6261 else
6262 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6263 TYPE_SIZE_UNIT (aggr_type));
6265 if (grouped_load || slp_perm)
6266 dr_chain.create (vec_num);
6268 if (load_lanes_p)
6270 tree vec_array;
6272 vec_array = create_vector_array (vectype, vec_num);
6274 /* Emit:
6275 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6276 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6277 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6278 gimple_call_set_lhs (new_stmt, vec_array);
6279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6281 /* Extract each vector into an SSA_NAME. */
6282 for (i = 0; i < vec_num; i++)
6284 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6285 vec_array, i);
6286 dr_chain.quick_push (new_temp);
6289 /* Record the mapping between SSA_NAMEs and statements. */
6290 vect_record_grouped_load_vectors (stmt, dr_chain);
6292 else
6294 for (i = 0; i < vec_num; i++)
6296 if (i > 0)
6297 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6298 stmt, NULL_TREE);
6300 /* 2. Create the vector-load in the loop. */
6301 switch (alignment_support_scheme)
6303 case dr_aligned:
6304 case dr_unaligned_supported:
6306 unsigned int align, misalign;
6308 data_ref
6309 = build2 (MEM_REF, vectype, dataref_ptr,
6310 dataref_offset
6311 ? dataref_offset
6312 : build_int_cst (reference_alias_ptr_type
6313 (DR_REF (first_dr)), 0));
6314 align = TYPE_ALIGN_UNIT (vectype);
6315 if (alignment_support_scheme == dr_aligned)
6317 gcc_assert (aligned_access_p (first_dr));
6318 misalign = 0;
6320 else if (DR_MISALIGNMENT (first_dr) == -1)
6322 TREE_TYPE (data_ref)
6323 = build_aligned_type (TREE_TYPE (data_ref),
6324 TYPE_ALIGN (elem_type));
6325 align = TYPE_ALIGN_UNIT (elem_type);
6326 misalign = 0;
6328 else
6330 TREE_TYPE (data_ref)
6331 = build_aligned_type (TREE_TYPE (data_ref),
6332 TYPE_ALIGN (elem_type));
6333 misalign = DR_MISALIGNMENT (first_dr);
6335 if (dataref_offset == NULL_TREE)
6336 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6337 align, misalign);
6338 break;
6340 case dr_explicit_realign:
6342 tree ptr, bump;
6343 tree vs_minus_1;
6345 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6347 if (compute_in_loop)
6348 msq = vect_setup_realignment (first_stmt, gsi,
6349 &realignment_token,
6350 dr_explicit_realign,
6351 dataref_ptr, NULL);
6353 ptr = copy_ssa_name (dataref_ptr, NULL);
6354 new_stmt = gimple_build_assign_with_ops
6355 (BIT_AND_EXPR, ptr, dataref_ptr,
6356 build_int_cst
6357 (TREE_TYPE (dataref_ptr),
6358 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6359 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6360 data_ref
6361 = build2 (MEM_REF, vectype, ptr,
6362 build_int_cst (reference_alias_ptr_type
6363 (DR_REF (first_dr)), 0));
6364 vec_dest = vect_create_destination_var (scalar_dest,
6365 vectype);
6366 new_stmt = gimple_build_assign (vec_dest, data_ref);
6367 new_temp = make_ssa_name (vec_dest, new_stmt);
6368 gimple_assign_set_lhs (new_stmt, new_temp);
6369 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6370 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6371 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6372 msq = new_temp;
6374 bump = size_binop (MULT_EXPR, vs_minus_1,
6375 TYPE_SIZE_UNIT (elem_type));
6376 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6377 new_stmt = gimple_build_assign_with_ops
6378 (BIT_AND_EXPR, NULL_TREE, ptr,
6379 build_int_cst
6380 (TREE_TYPE (ptr),
6381 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6382 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6383 gimple_assign_set_lhs (new_stmt, ptr);
6384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6385 data_ref
6386 = build2 (MEM_REF, vectype, ptr,
6387 build_int_cst (reference_alias_ptr_type
6388 (DR_REF (first_dr)), 0));
6389 break;
6391 case dr_explicit_realign_optimized:
6392 new_temp = copy_ssa_name (dataref_ptr, NULL);
6393 new_stmt = gimple_build_assign_with_ops
6394 (BIT_AND_EXPR, new_temp, dataref_ptr,
6395 build_int_cst
6396 (TREE_TYPE (dataref_ptr),
6397 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6398 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6399 data_ref
6400 = build2 (MEM_REF, vectype, new_temp,
6401 build_int_cst (reference_alias_ptr_type
6402 (DR_REF (first_dr)), 0));
6403 break;
6404 default:
6405 gcc_unreachable ();
6407 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6408 new_stmt = gimple_build_assign (vec_dest, data_ref);
6409 new_temp = make_ssa_name (vec_dest, new_stmt);
6410 gimple_assign_set_lhs (new_stmt, new_temp);
6411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6413 /* 3. Handle explicit realignment if necessary/supported.
6414 Create in loop:
6415 vec_dest = realign_load (msq, lsq, realignment_token) */
6416 if (alignment_support_scheme == dr_explicit_realign_optimized
6417 || alignment_support_scheme == dr_explicit_realign)
6419 lsq = gimple_assign_lhs (new_stmt);
6420 if (!realignment_token)
6421 realignment_token = dataref_ptr;
6422 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6423 new_stmt
6424 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6425 vec_dest, msq, lsq,
6426 realignment_token);
6427 new_temp = make_ssa_name (vec_dest, new_stmt);
6428 gimple_assign_set_lhs (new_stmt, new_temp);
6429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6431 if (alignment_support_scheme == dr_explicit_realign_optimized)
6433 gcc_assert (phi);
6434 if (i == vec_num - 1 && j == ncopies - 1)
6435 add_phi_arg (phi, lsq,
6436 loop_latch_edge (containing_loop),
6437 UNKNOWN_LOCATION);
6438 msq = lsq;
6442 /* 4. Handle invariant-load. */
6443 if (inv_p && !bb_vinfo)
6445 gcc_assert (!grouped_load);
6446 /* If we have versioned for aliasing or the loop doesn't
6447 have any data dependencies that would preclude this,
6448 then we are sure this is a loop invariant load and
6449 thus we can insert it on the preheader edge. */
6450 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6451 && !nested_in_vect_loop
6452 && hoist_defs_of_uses (stmt, loop))
6454 if (dump_enabled_p ())
6456 dump_printf_loc (MSG_NOTE, vect_location,
6457 "hoisting out of the vectorized "
6458 "loop: ");
6459 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6460 dump_printf (MSG_NOTE, "\n");
6462 tree tem = copy_ssa_name (scalar_dest, NULL);
6463 gsi_insert_on_edge_immediate
6464 (loop_preheader_edge (loop),
6465 gimple_build_assign (tem,
6466 unshare_expr
6467 (gimple_assign_rhs1 (stmt))));
6468 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6470 else
6472 gimple_stmt_iterator gsi2 = *gsi;
6473 gsi_next (&gsi2);
6474 new_temp = vect_init_vector (stmt, scalar_dest,
6475 vectype, &gsi2);
6477 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6478 set_vinfo_for_stmt (new_stmt,
6479 new_stmt_vec_info (new_stmt, loop_vinfo,
6480 bb_vinfo));
6483 if (negative)
6485 tree perm_mask = perm_mask_for_reverse (vectype);
6486 new_temp = permute_vec_elements (new_temp, new_temp,
6487 perm_mask, stmt, gsi);
6488 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6491 /* Collect vector loads and later create their permutation in
6492 vect_transform_grouped_load (). */
6493 if (grouped_load || slp_perm)
6494 dr_chain.quick_push (new_temp);
6496 /* Store vector loads in the corresponding SLP_NODE. */
6497 if (slp && !slp_perm)
6498 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6500 /* Bump the vector pointer to account for a gap. */
6501 if (slp && group_gap != 0)
6503 tree bump = size_binop (MULT_EXPR,
6504 TYPE_SIZE_UNIT (elem_type),
6505 size_int (group_gap));
6506 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6507 stmt, bump);
6511 if (slp && !slp_perm)
6512 continue;
6514 if (slp_perm)
6516 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6517 slp_node_instance, false))
6519 dr_chain.release ();
6520 return false;
6523 else
6525 if (grouped_load)
6527 if (!load_lanes_p)
6528 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6529 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6531 else
6533 if (j == 0)
6534 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6535 else
6536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6537 prev_stmt_info = vinfo_for_stmt (new_stmt);
6540 dr_chain.release ();
6543 return true;
6546 /* Function vect_is_simple_cond.
6548 Input:
6549 LOOP - the loop that is being vectorized.
6550 COND - Condition that is checked for simple use.
6552 Output:
6553 *COMP_VECTYPE - the vector type for the comparison.
6555 Returns whether a COND can be vectorized. Checks whether
6556 condition operands are supportable using vec_is_simple_use. */
6558 static bool
6559 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6560 bb_vec_info bb_vinfo, tree *comp_vectype)
6562 tree lhs, rhs;
6563 tree def;
6564 enum vect_def_type dt;
6565 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6567 if (!COMPARISON_CLASS_P (cond))
6568 return false;
6570 lhs = TREE_OPERAND (cond, 0);
6571 rhs = TREE_OPERAND (cond, 1);
6573 if (TREE_CODE (lhs) == SSA_NAME)
6575 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6576 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6577 &lhs_def_stmt, &def, &dt, &vectype1))
6578 return false;
6580 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6581 && TREE_CODE (lhs) != FIXED_CST)
6582 return false;
6584 if (TREE_CODE (rhs) == SSA_NAME)
6586 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6587 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6588 &rhs_def_stmt, &def, &dt, &vectype2))
6589 return false;
6591 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6592 && TREE_CODE (rhs) != FIXED_CST)
6593 return false;
6595 *comp_vectype = vectype1 ? vectype1 : vectype2;
6596 return true;
6599 /* vectorizable_condition.
6601 Check if STMT is conditional modify expression that can be vectorized.
6602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6603 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6604 at GSI.
6606 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6607 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6608 else caluse if it is 2).
6610 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6612 bool
6613 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6614 gimple *vec_stmt, tree reduc_def, int reduc_index,
6615 slp_tree slp_node)
6617 tree scalar_dest = NULL_TREE;
6618 tree vec_dest = NULL_TREE;
6619 tree cond_expr, then_clause, else_clause;
6620 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6621 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6622 tree comp_vectype = NULL_TREE;
6623 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6624 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6625 tree vec_compare, vec_cond_expr;
6626 tree new_temp;
6627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6628 tree def;
6629 enum vect_def_type dt, dts[4];
6630 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6631 int ncopies;
6632 enum tree_code code;
6633 stmt_vec_info prev_stmt_info = NULL;
6634 int i, j;
6635 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6636 vec<tree> vec_oprnds0 = vNULL;
6637 vec<tree> vec_oprnds1 = vNULL;
6638 vec<tree> vec_oprnds2 = vNULL;
6639 vec<tree> vec_oprnds3 = vNULL;
6640 tree vec_cmp_type;
6642 if (slp_node || PURE_SLP_STMT (stmt_info))
6643 ncopies = 1;
6644 else
6645 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6647 gcc_assert (ncopies >= 1);
6648 if (reduc_index && ncopies > 1)
6649 return false; /* FORNOW */
6651 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6652 return false;
6654 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6655 return false;
6657 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6658 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6659 && reduc_def))
6660 return false;
6662 /* FORNOW: not yet supported. */
6663 if (STMT_VINFO_LIVE_P (stmt_info))
6665 if (dump_enabled_p ())
6666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6667 "value used after loop.\n");
6668 return false;
6671 /* Is vectorizable conditional operation? */
6672 if (!is_gimple_assign (stmt))
6673 return false;
6675 code = gimple_assign_rhs_code (stmt);
6677 if (code != COND_EXPR)
6678 return false;
6680 cond_expr = gimple_assign_rhs1 (stmt);
6681 then_clause = gimple_assign_rhs2 (stmt);
6682 else_clause = gimple_assign_rhs3 (stmt);
6684 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6685 &comp_vectype)
6686 || !comp_vectype)
6687 return false;
6689 if (TREE_CODE (then_clause) == SSA_NAME)
6691 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6692 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6693 &then_def_stmt, &def, &dt))
6694 return false;
6696 else if (TREE_CODE (then_clause) != INTEGER_CST
6697 && TREE_CODE (then_clause) != REAL_CST
6698 && TREE_CODE (then_clause) != FIXED_CST)
6699 return false;
6701 if (TREE_CODE (else_clause) == SSA_NAME)
6703 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6704 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6705 &else_def_stmt, &def, &dt))
6706 return false;
6708 else if (TREE_CODE (else_clause) != INTEGER_CST
6709 && TREE_CODE (else_clause) != REAL_CST
6710 && TREE_CODE (else_clause) != FIXED_CST)
6711 return false;
6713 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6714 /* The result of a vector comparison should be signed type. */
6715 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6716 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6717 if (vec_cmp_type == NULL_TREE)
6718 return false;
6720 if (!vec_stmt)
6722 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6723 return expand_vec_cond_expr_p (vectype, comp_vectype);
6726 /* Transform. */
6728 if (!slp_node)
6730 vec_oprnds0.create (1);
6731 vec_oprnds1.create (1);
6732 vec_oprnds2.create (1);
6733 vec_oprnds3.create (1);
6736 /* Handle def. */
6737 scalar_dest = gimple_assign_lhs (stmt);
6738 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6740 /* Handle cond expr. */
6741 for (j = 0; j < ncopies; j++)
6743 gimple new_stmt = NULL;
6744 if (j == 0)
6746 if (slp_node)
6748 auto_vec<tree, 4> ops;
6749 auto_vec<vec<tree>, 4> vec_defs;
6751 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6752 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6753 ops.safe_push (then_clause);
6754 ops.safe_push (else_clause);
6755 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6756 vec_oprnds3 = vec_defs.pop ();
6757 vec_oprnds2 = vec_defs.pop ();
6758 vec_oprnds1 = vec_defs.pop ();
6759 vec_oprnds0 = vec_defs.pop ();
6761 ops.release ();
6762 vec_defs.release ();
6764 else
6766 gimple gtemp;
6767 vec_cond_lhs =
6768 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6769 stmt, NULL);
6770 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6771 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6773 vec_cond_rhs =
6774 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6775 stmt, NULL);
6776 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6777 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6778 if (reduc_index == 1)
6779 vec_then_clause = reduc_def;
6780 else
6782 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6783 stmt, NULL);
6784 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6785 NULL, &gtemp, &def, &dts[2]);
6787 if (reduc_index == 2)
6788 vec_else_clause = reduc_def;
6789 else
6791 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6792 stmt, NULL);
6793 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6794 NULL, &gtemp, &def, &dts[3]);
6798 else
6800 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6801 vec_oprnds0.pop ());
6802 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6803 vec_oprnds1.pop ());
6804 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6805 vec_oprnds2.pop ());
6806 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6807 vec_oprnds3.pop ());
6810 if (!slp_node)
6812 vec_oprnds0.quick_push (vec_cond_lhs);
6813 vec_oprnds1.quick_push (vec_cond_rhs);
6814 vec_oprnds2.quick_push (vec_then_clause);
6815 vec_oprnds3.quick_push (vec_else_clause);
6818 /* Arguments are ready. Create the new vector stmt. */
6819 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6821 vec_cond_rhs = vec_oprnds1[i];
6822 vec_then_clause = vec_oprnds2[i];
6823 vec_else_clause = vec_oprnds3[i];
6825 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6826 vec_cond_lhs, vec_cond_rhs);
6827 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6828 vec_compare, vec_then_clause, vec_else_clause);
6830 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6831 new_temp = make_ssa_name (vec_dest, new_stmt);
6832 gimple_assign_set_lhs (new_stmt, new_temp);
6833 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6834 if (slp_node)
6835 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6838 if (slp_node)
6839 continue;
6841 if (j == 0)
6842 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6843 else
6844 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6846 prev_stmt_info = vinfo_for_stmt (new_stmt);
6849 vec_oprnds0.release ();
6850 vec_oprnds1.release ();
6851 vec_oprnds2.release ();
6852 vec_oprnds3.release ();
6854 return true;
6858 /* Make sure the statement is vectorizable. */
6860 bool
6861 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6863 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6864 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6865 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6866 bool ok;
6867 tree scalar_type, vectype;
6868 gimple pattern_stmt;
6869 gimple_seq pattern_def_seq;
6871 if (dump_enabled_p ())
6873 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6874 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6875 dump_printf (MSG_NOTE, "\n");
6878 if (gimple_has_volatile_ops (stmt))
6880 if (dump_enabled_p ())
6881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6882 "not vectorized: stmt has volatile operands\n");
6884 return false;
6887 /* Skip stmts that do not need to be vectorized. In loops this is expected
6888 to include:
6889 - the COND_EXPR which is the loop exit condition
6890 - any LABEL_EXPRs in the loop
6891 - computations that are used only for array indexing or loop control.
6892 In basic blocks we only analyze statements that are a part of some SLP
6893 instance, therefore, all the statements are relevant.
6895 Pattern statement needs to be analyzed instead of the original statement
6896 if the original statement is not relevant. Otherwise, we analyze both
6897 statements. In basic blocks we are called from some SLP instance
6898 traversal, don't analyze pattern stmts instead, the pattern stmts
6899 already will be part of SLP instance. */
6901 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6902 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6903 && !STMT_VINFO_LIVE_P (stmt_info))
6905 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6906 && pattern_stmt
6907 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6908 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6910 /* Analyze PATTERN_STMT instead of the original stmt. */
6911 stmt = pattern_stmt;
6912 stmt_info = vinfo_for_stmt (pattern_stmt);
6913 if (dump_enabled_p ())
6915 dump_printf_loc (MSG_NOTE, vect_location,
6916 "==> examining pattern statement: ");
6917 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6918 dump_printf (MSG_NOTE, "\n");
6921 else
6923 if (dump_enabled_p ())
6924 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6926 return true;
6929 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6930 && node == NULL
6931 && pattern_stmt
6932 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6933 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6935 /* Analyze PATTERN_STMT too. */
6936 if (dump_enabled_p ())
6938 dump_printf_loc (MSG_NOTE, vect_location,
6939 "==> examining pattern statement: ");
6940 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6941 dump_printf (MSG_NOTE, "\n");
6944 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
6945 return false;
6948 if (is_pattern_stmt_p (stmt_info)
6949 && node == NULL
6950 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
6952 gimple_stmt_iterator si;
6954 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
6956 gimple pattern_def_stmt = gsi_stmt (si);
6957 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
6958 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
6960 /* Analyze def stmt of STMT if it's a pattern stmt. */
6961 if (dump_enabled_p ())
6963 dump_printf_loc (MSG_NOTE, vect_location,
6964 "==> examining pattern def statement: ");
6965 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
6966 dump_printf (MSG_NOTE, "\n");
6969 if (!vect_analyze_stmt (pattern_def_stmt,
6970 need_to_vectorize, node))
6971 return false;
6976 switch (STMT_VINFO_DEF_TYPE (stmt_info))
6978 case vect_internal_def:
6979 break;
6981 case vect_reduction_def:
6982 case vect_nested_cycle:
6983 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
6984 || relevance == vect_used_in_outer_by_reduction
6985 || relevance == vect_unused_in_scope));
6986 break;
6988 case vect_induction_def:
6989 case vect_constant_def:
6990 case vect_external_def:
6991 case vect_unknown_def_type:
6992 default:
6993 gcc_unreachable ();
6996 if (bb_vinfo)
6998 gcc_assert (PURE_SLP_STMT (stmt_info));
7000 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7001 if (dump_enabled_p ())
7003 dump_printf_loc (MSG_NOTE, vect_location,
7004 "get vectype for scalar type: ");
7005 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7006 dump_printf (MSG_NOTE, "\n");
7009 vectype = get_vectype_for_scalar_type (scalar_type);
7010 if (!vectype)
7012 if (dump_enabled_p ())
7014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7015 "not SLPed: unsupported data-type ");
7016 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7017 scalar_type);
7018 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7020 return false;
7023 if (dump_enabled_p ())
7025 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7026 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7027 dump_printf (MSG_NOTE, "\n");
7030 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7033 if (STMT_VINFO_RELEVANT_P (stmt_info))
7035 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7036 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7037 || (is_gimple_call (stmt)
7038 && gimple_call_lhs (stmt) == NULL_TREE));
7039 *need_to_vectorize = true;
7042 ok = true;
7043 if (!bb_vinfo
7044 && (STMT_VINFO_RELEVANT_P (stmt_info)
7045 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7046 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7047 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7048 || vectorizable_shift (stmt, NULL, NULL, NULL)
7049 || vectorizable_operation (stmt, NULL, NULL, NULL)
7050 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7051 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7052 || vectorizable_call (stmt, NULL, NULL, NULL)
7053 || vectorizable_store (stmt, NULL, NULL, NULL)
7054 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7055 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7056 else
7058 if (bb_vinfo)
7059 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7060 || vectorizable_conversion (stmt, NULL, NULL, node)
7061 || vectorizable_shift (stmt, NULL, NULL, node)
7062 || vectorizable_operation (stmt, NULL, NULL, node)
7063 || vectorizable_assignment (stmt, NULL, NULL, node)
7064 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7065 || vectorizable_call (stmt, NULL, NULL, node)
7066 || vectorizable_store (stmt, NULL, NULL, node)
7067 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7070 if (!ok)
7072 if (dump_enabled_p ())
7074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7075 "not vectorized: relevant stmt not ");
7076 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7077 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7078 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7081 return false;
7084 if (bb_vinfo)
7085 return true;
7087 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7088 need extra handling, except for vectorizable reductions. */
7089 if (STMT_VINFO_LIVE_P (stmt_info)
7090 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7091 ok = vectorizable_live_operation (stmt, NULL, NULL);
7093 if (!ok)
7095 if (dump_enabled_p ())
7097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7098 "not vectorized: live stmt not ");
7099 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7100 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7101 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7104 return false;
7107 return true;
7111 /* Function vect_transform_stmt.
7113 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7115 bool
7116 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7117 bool *grouped_store, slp_tree slp_node,
7118 slp_instance slp_node_instance)
7120 bool is_store = false;
7121 gimple vec_stmt = NULL;
7122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7123 bool done;
7125 switch (STMT_VINFO_TYPE (stmt_info))
7127 case type_demotion_vec_info_type:
7128 case type_promotion_vec_info_type:
7129 case type_conversion_vec_info_type:
7130 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7131 gcc_assert (done);
7132 break;
7134 case induc_vec_info_type:
7135 gcc_assert (!slp_node);
7136 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7137 gcc_assert (done);
7138 break;
7140 case shift_vec_info_type:
7141 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7142 gcc_assert (done);
7143 break;
7145 case op_vec_info_type:
7146 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7147 gcc_assert (done);
7148 break;
7150 case assignment_vec_info_type:
7151 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7152 gcc_assert (done);
7153 break;
7155 case load_vec_info_type:
7156 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7157 slp_node_instance);
7158 gcc_assert (done);
7159 break;
7161 case store_vec_info_type:
7162 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7163 gcc_assert (done);
7164 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7166 /* In case of interleaving, the whole chain is vectorized when the
7167 last store in the chain is reached. Store stmts before the last
7168 one are skipped, and there vec_stmt_info shouldn't be freed
7169 meanwhile. */
7170 *grouped_store = true;
7171 if (STMT_VINFO_VEC_STMT (stmt_info))
7172 is_store = true;
7174 else
7175 is_store = true;
7176 break;
7178 case condition_vec_info_type:
7179 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7180 gcc_assert (done);
7181 break;
7183 case call_vec_info_type:
7184 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7185 stmt = gsi_stmt (*gsi);
7186 if (is_gimple_call (stmt)
7187 && gimple_call_internal_p (stmt)
7188 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7189 is_store = true;
7190 break;
7192 case call_simd_clone_vec_info_type:
7193 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7194 stmt = gsi_stmt (*gsi);
7195 break;
7197 case reduc_vec_info_type:
7198 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7199 gcc_assert (done);
7200 break;
7202 default:
7203 if (!STMT_VINFO_LIVE_P (stmt_info))
7205 if (dump_enabled_p ())
7206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7207 "stmt not supported.\n");
7208 gcc_unreachable ();
7212 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7213 is being vectorized, but outside the immediately enclosing loop. */
7214 if (vec_stmt
7215 && STMT_VINFO_LOOP_VINFO (stmt_info)
7216 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7217 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7218 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7219 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7220 || STMT_VINFO_RELEVANT (stmt_info) ==
7221 vect_used_in_outer_by_reduction))
7223 struct loop *innerloop = LOOP_VINFO_LOOP (
7224 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7225 imm_use_iterator imm_iter;
7226 use_operand_p use_p;
7227 tree scalar_dest;
7228 gimple exit_phi;
7230 if (dump_enabled_p ())
7231 dump_printf_loc (MSG_NOTE, vect_location,
7232 "Record the vdef for outer-loop vectorization.\n");
7234 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7235 (to be used when vectorizing outer-loop stmts that use the DEF of
7236 STMT). */
7237 if (gimple_code (stmt) == GIMPLE_PHI)
7238 scalar_dest = PHI_RESULT (stmt);
7239 else
7240 scalar_dest = gimple_assign_lhs (stmt);
7242 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7244 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7246 exit_phi = USE_STMT (use_p);
7247 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7252 /* Handle stmts whose DEF is used outside the loop-nest that is
7253 being vectorized. */
7254 if (STMT_VINFO_LIVE_P (stmt_info)
7255 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7257 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7258 gcc_assert (done);
7261 if (vec_stmt)
7262 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7264 return is_store;
7268 /* Remove a group of stores (for SLP or interleaving), free their
7269 stmt_vec_info. */
7271 void
7272 vect_remove_stores (gimple first_stmt)
7274 gimple next = first_stmt;
7275 gimple tmp;
7276 gimple_stmt_iterator next_si;
7278 while (next)
7280 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7282 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7283 if (is_pattern_stmt_p (stmt_info))
7284 next = STMT_VINFO_RELATED_STMT (stmt_info);
7285 /* Free the attached stmt_vec_info and remove the stmt. */
7286 next_si = gsi_for_stmt (next);
7287 unlink_stmt_vdef (next);
7288 gsi_remove (&next_si, true);
7289 release_defs (next);
7290 free_stmt_vec_info (next);
7291 next = tmp;
7296 /* Function new_stmt_vec_info.
7298 Create and initialize a new stmt_vec_info struct for STMT. */
7300 stmt_vec_info
7301 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7302 bb_vec_info bb_vinfo)
7304 stmt_vec_info res;
7305 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7307 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7308 STMT_VINFO_STMT (res) = stmt;
7309 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7310 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7311 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7312 STMT_VINFO_LIVE_P (res) = false;
7313 STMT_VINFO_VECTYPE (res) = NULL;
7314 STMT_VINFO_VEC_STMT (res) = NULL;
7315 STMT_VINFO_VECTORIZABLE (res) = true;
7316 STMT_VINFO_IN_PATTERN_P (res) = false;
7317 STMT_VINFO_RELATED_STMT (res) = NULL;
7318 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7319 STMT_VINFO_DATA_REF (res) = NULL;
7321 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7322 STMT_VINFO_DR_OFFSET (res) = NULL;
7323 STMT_VINFO_DR_INIT (res) = NULL;
7324 STMT_VINFO_DR_STEP (res) = NULL;
7325 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7327 if (gimple_code (stmt) == GIMPLE_PHI
7328 && is_loop_header_bb_p (gimple_bb (stmt)))
7329 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7330 else
7331 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7333 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7334 STMT_SLP_TYPE (res) = loop_vect;
7335 GROUP_FIRST_ELEMENT (res) = NULL;
7336 GROUP_NEXT_ELEMENT (res) = NULL;
7337 GROUP_SIZE (res) = 0;
7338 GROUP_STORE_COUNT (res) = 0;
7339 GROUP_GAP (res) = 0;
7340 GROUP_SAME_DR_STMT (res) = NULL;
7342 return res;
7346 /* Create a hash table for stmt_vec_info. */
7348 void
7349 init_stmt_vec_info_vec (void)
7351 gcc_assert (!stmt_vec_info_vec.exists ());
7352 stmt_vec_info_vec.create (50);
7356 /* Free hash table for stmt_vec_info. */
7358 void
7359 free_stmt_vec_info_vec (void)
7361 unsigned int i;
7362 vec_void_p info;
7363 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7364 if (info != NULL)
7365 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7366 gcc_assert (stmt_vec_info_vec.exists ());
7367 stmt_vec_info_vec.release ();
7371 /* Free stmt vectorization related info. */
7373 void
7374 free_stmt_vec_info (gimple stmt)
7376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7378 if (!stmt_info)
7379 return;
7381 /* Check if this statement has a related "pattern stmt"
7382 (introduced by the vectorizer during the pattern recognition
7383 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7384 too. */
7385 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7387 stmt_vec_info patt_info
7388 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7389 if (patt_info)
7391 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7392 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7393 gimple_set_bb (patt_stmt, NULL);
7394 tree lhs = gimple_get_lhs (patt_stmt);
7395 if (TREE_CODE (lhs) == SSA_NAME)
7396 release_ssa_name (lhs);
7397 if (seq)
7399 gimple_stmt_iterator si;
7400 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7402 gimple seq_stmt = gsi_stmt (si);
7403 gimple_set_bb (seq_stmt, NULL);
7404 lhs = gimple_get_lhs (patt_stmt);
7405 if (TREE_CODE (lhs) == SSA_NAME)
7406 release_ssa_name (lhs);
7407 free_stmt_vec_info (seq_stmt);
7410 free_stmt_vec_info (patt_stmt);
7414 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7415 set_vinfo_for_stmt (stmt, NULL);
7416 free (stmt_info);
7420 /* Function get_vectype_for_scalar_type_and_size.
7422 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7423 by the target. */
7425 static tree
7426 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7428 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
7429 enum machine_mode simd_mode;
7430 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7431 int nunits;
7432 tree vectype;
7434 if (nbytes == 0)
7435 return NULL_TREE;
7437 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7438 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7439 return NULL_TREE;
7441 /* For vector types of elements whose mode precision doesn't
7442 match their types precision we use a element type of mode
7443 precision. The vectorization routines will have to make sure
7444 they support the proper result truncation/extension.
7445 We also make sure to build vector types with INTEGER_TYPE
7446 component type only. */
7447 if (INTEGRAL_TYPE_P (scalar_type)
7448 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7449 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7450 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7451 TYPE_UNSIGNED (scalar_type));
7453 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7454 When the component mode passes the above test simply use a type
7455 corresponding to that mode. The theory is that any use that
7456 would cause problems with this will disable vectorization anyway. */
7457 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7458 && !INTEGRAL_TYPE_P (scalar_type))
7459 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7461 /* We can't build a vector type of elements with alignment bigger than
7462 their size. */
7463 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7464 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7465 TYPE_UNSIGNED (scalar_type));
7467 /* If we felt back to using the mode fail if there was
7468 no scalar type for it. */
7469 if (scalar_type == NULL_TREE)
7470 return NULL_TREE;
7472 /* If no size was supplied use the mode the target prefers. Otherwise
7473 lookup a vector mode of the specified size. */
7474 if (size == 0)
7475 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7476 else
7477 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7478 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7479 if (nunits <= 1)
7480 return NULL_TREE;
7482 vectype = build_vector_type (scalar_type, nunits);
7484 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7485 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7486 return NULL_TREE;
7488 return vectype;
7491 unsigned int current_vector_size;
7493 /* Function get_vectype_for_scalar_type.
7495 Returns the vector type corresponding to SCALAR_TYPE as supported
7496 by the target. */
7498 tree
7499 get_vectype_for_scalar_type (tree scalar_type)
7501 tree vectype;
7502 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7503 current_vector_size);
7504 if (vectype
7505 && current_vector_size == 0)
7506 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7507 return vectype;
7510 /* Function get_same_sized_vectype
7512 Returns a vector type corresponding to SCALAR_TYPE of size
7513 VECTOR_TYPE if supported by the target. */
7515 tree
7516 get_same_sized_vectype (tree scalar_type, tree vector_type)
7518 return get_vectype_for_scalar_type_and_size
7519 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7522 /* Function vect_is_simple_use.
7524 Input:
7525 LOOP_VINFO - the vect info of the loop that is being vectorized.
7526 BB_VINFO - the vect info of the basic block that is being vectorized.
7527 OPERAND - operand of STMT in the loop or bb.
7528 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7530 Returns whether a stmt with OPERAND can be vectorized.
7531 For loops, supportable operands are constants, loop invariants, and operands
7532 that are defined by the current iteration of the loop. Unsupportable
7533 operands are those that are defined by a previous iteration of the loop (as
7534 is the case in reduction/induction computations).
7535 For basic blocks, supportable operands are constants and bb invariants.
7536 For now, operands defined outside the basic block are not supported. */
7538 bool
7539 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7540 bb_vec_info bb_vinfo, gimple *def_stmt,
7541 tree *def, enum vect_def_type *dt)
7543 basic_block bb;
7544 stmt_vec_info stmt_vinfo;
7545 struct loop *loop = NULL;
7547 if (loop_vinfo)
7548 loop = LOOP_VINFO_LOOP (loop_vinfo);
7550 *def_stmt = NULL;
7551 *def = NULL_TREE;
7553 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_NOTE, vect_location,
7556 "vect_is_simple_use: operand ");
7557 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7558 dump_printf (MSG_NOTE, "\n");
7561 if (CONSTANT_CLASS_P (operand))
7563 *dt = vect_constant_def;
7564 return true;
7567 if (is_gimple_min_invariant (operand))
7569 *def = operand;
7570 *dt = vect_external_def;
7571 return true;
7574 if (TREE_CODE (operand) == PAREN_EXPR)
7576 if (dump_enabled_p ())
7577 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7578 operand = TREE_OPERAND (operand, 0);
7581 if (TREE_CODE (operand) != SSA_NAME)
7583 if (dump_enabled_p ())
7584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7585 "not ssa-name.\n");
7586 return false;
7589 *def_stmt = SSA_NAME_DEF_STMT (operand);
7590 if (*def_stmt == NULL)
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7594 "no def_stmt.\n");
7595 return false;
7598 if (dump_enabled_p ())
7600 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7601 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7602 dump_printf (MSG_NOTE, "\n");
7605 /* Empty stmt is expected only in case of a function argument.
7606 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7607 if (gimple_nop_p (*def_stmt))
7609 *def = operand;
7610 *dt = vect_external_def;
7611 return true;
7614 bb = gimple_bb (*def_stmt);
7616 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7617 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7618 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7619 *dt = vect_external_def;
7620 else
7622 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7623 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7626 if (*dt == vect_unknown_def_type
7627 || (stmt
7628 && *dt == vect_double_reduction_def
7629 && gimple_code (stmt) != GIMPLE_PHI))
7631 if (dump_enabled_p ())
7632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7633 "Unsupported pattern.\n");
7634 return false;
7637 if (dump_enabled_p ())
7638 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7640 switch (gimple_code (*def_stmt))
7642 case GIMPLE_PHI:
7643 *def = gimple_phi_result (*def_stmt);
7644 break;
7646 case GIMPLE_ASSIGN:
7647 *def = gimple_assign_lhs (*def_stmt);
7648 break;
7650 case GIMPLE_CALL:
7651 *def = gimple_call_lhs (*def_stmt);
7652 if (*def != NULL)
7653 break;
7654 /* FALLTHRU */
7655 default:
7656 if (dump_enabled_p ())
7657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7658 "unsupported defining stmt:\n");
7659 return false;
7662 return true;
7665 /* Function vect_is_simple_use_1.
7667 Same as vect_is_simple_use_1 but also determines the vector operand
7668 type of OPERAND and stores it to *VECTYPE. If the definition of
7669 OPERAND is vect_uninitialized_def, vect_constant_def or
7670 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7671 is responsible to compute the best suited vector type for the
7672 scalar operand. */
7674 bool
7675 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7676 bb_vec_info bb_vinfo, gimple *def_stmt,
7677 tree *def, enum vect_def_type *dt, tree *vectype)
7679 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7680 def, dt))
7681 return false;
7683 /* Now get a vector type if the def is internal, otherwise supply
7684 NULL_TREE and leave it up to the caller to figure out a proper
7685 type for the use stmt. */
7686 if (*dt == vect_internal_def
7687 || *dt == vect_induction_def
7688 || *dt == vect_reduction_def
7689 || *dt == vect_double_reduction_def
7690 || *dt == vect_nested_cycle)
7692 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7694 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7695 && !STMT_VINFO_RELEVANT (stmt_info)
7696 && !STMT_VINFO_LIVE_P (stmt_info))
7697 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7699 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7700 gcc_assert (*vectype != NULL_TREE);
7702 else if (*dt == vect_uninitialized_def
7703 || *dt == vect_constant_def
7704 || *dt == vect_external_def)
7705 *vectype = NULL_TREE;
7706 else
7707 gcc_unreachable ();
7709 return true;
7713 /* Function supportable_widening_operation
7715 Check whether an operation represented by the code CODE is a
7716 widening operation that is supported by the target platform in
7717 vector form (i.e., when operating on arguments of type VECTYPE_IN
7718 producing a result of type VECTYPE_OUT).
7720 Widening operations we currently support are NOP (CONVERT), FLOAT
7721 and WIDEN_MULT. This function checks if these operations are supported
7722 by the target platform either directly (via vector tree-codes), or via
7723 target builtins.
7725 Output:
7726 - CODE1 and CODE2 are codes of vector operations to be used when
7727 vectorizing the operation, if available.
7728 - MULTI_STEP_CVT determines the number of required intermediate steps in
7729 case of multi-step conversion (like char->short->int - in that case
7730 MULTI_STEP_CVT will be 1).
7731 - INTERM_TYPES contains the intermediate type required to perform the
7732 widening operation (short in the above example). */
7734 bool
7735 supportable_widening_operation (enum tree_code code, gimple stmt,
7736 tree vectype_out, tree vectype_in,
7737 enum tree_code *code1, enum tree_code *code2,
7738 int *multi_step_cvt,
7739 vec<tree> *interm_types)
7741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7742 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7743 struct loop *vect_loop = NULL;
7744 enum machine_mode vec_mode;
7745 enum insn_code icode1, icode2;
7746 optab optab1, optab2;
7747 tree vectype = vectype_in;
7748 tree wide_vectype = vectype_out;
7749 enum tree_code c1, c2;
7750 int i;
7751 tree prev_type, intermediate_type;
7752 enum machine_mode intermediate_mode, prev_mode;
7753 optab optab3, optab4;
7755 *multi_step_cvt = 0;
7756 if (loop_info)
7757 vect_loop = LOOP_VINFO_LOOP (loop_info);
7759 switch (code)
7761 case WIDEN_MULT_EXPR:
7762 /* The result of a vectorized widening operation usually requires
7763 two vectors (because the widened results do not fit into one vector).
7764 The generated vector results would normally be expected to be
7765 generated in the same order as in the original scalar computation,
7766 i.e. if 8 results are generated in each vector iteration, they are
7767 to be organized as follows:
7768 vect1: [res1,res2,res3,res4],
7769 vect2: [res5,res6,res7,res8].
7771 However, in the special case that the result of the widening
7772 operation is used in a reduction computation only, the order doesn't
7773 matter (because when vectorizing a reduction we change the order of
7774 the computation). Some targets can take advantage of this and
7775 generate more efficient code. For example, targets like Altivec,
7776 that support widen_mult using a sequence of {mult_even,mult_odd}
7777 generate the following vectors:
7778 vect1: [res1,res3,res5,res7],
7779 vect2: [res2,res4,res6,res8].
7781 When vectorizing outer-loops, we execute the inner-loop sequentially
7782 (each vectorized inner-loop iteration contributes to VF outer-loop
7783 iterations in parallel). We therefore don't allow to change the
7784 order of the computation in the inner-loop during outer-loop
7785 vectorization. */
7786 /* TODO: Another case in which order doesn't *really* matter is when we
7787 widen and then contract again, e.g. (short)((int)x * y >> 8).
7788 Normally, pack_trunc performs an even/odd permute, whereas the
7789 repack from an even/odd expansion would be an interleave, which
7790 would be significantly simpler for e.g. AVX2. */
7791 /* In any case, in order to avoid duplicating the code below, recurse
7792 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7793 are properly set up for the caller. If we fail, we'll continue with
7794 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7795 if (vect_loop
7796 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7797 && !nested_in_vect_loop_p (vect_loop, stmt)
7798 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7799 stmt, vectype_out, vectype_in,
7800 code1, code2, multi_step_cvt,
7801 interm_types))
7802 return true;
7803 c1 = VEC_WIDEN_MULT_LO_EXPR;
7804 c2 = VEC_WIDEN_MULT_HI_EXPR;
7805 break;
7807 case VEC_WIDEN_MULT_EVEN_EXPR:
7808 /* Support the recursion induced just above. */
7809 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7810 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7811 break;
7813 case WIDEN_LSHIFT_EXPR:
7814 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7815 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7816 break;
7818 CASE_CONVERT:
7819 c1 = VEC_UNPACK_LO_EXPR;
7820 c2 = VEC_UNPACK_HI_EXPR;
7821 break;
7823 case FLOAT_EXPR:
7824 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7825 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7826 break;
7828 case FIX_TRUNC_EXPR:
7829 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7830 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7831 computing the operation. */
7832 return false;
7834 default:
7835 gcc_unreachable ();
7838 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7840 enum tree_code ctmp = c1;
7841 c1 = c2;
7842 c2 = ctmp;
7845 if (code == FIX_TRUNC_EXPR)
7847 /* The signedness is determined from output operand. */
7848 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7849 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7851 else
7853 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7854 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7857 if (!optab1 || !optab2)
7858 return false;
7860 vec_mode = TYPE_MODE (vectype);
7861 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7862 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7863 return false;
7865 *code1 = c1;
7866 *code2 = c2;
7868 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7869 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7870 return true;
7872 /* Check if it's a multi-step conversion that can be done using intermediate
7873 types. */
7875 prev_type = vectype;
7876 prev_mode = vec_mode;
7878 if (!CONVERT_EXPR_CODE_P (code))
7879 return false;
7881 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7882 intermediate steps in promotion sequence. We try
7883 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7884 not. */
7885 interm_types->create (MAX_INTERM_CVT_STEPS);
7886 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7888 intermediate_mode = insn_data[icode1].operand[0].mode;
7889 intermediate_type
7890 = lang_hooks.types.type_for_mode (intermediate_mode,
7891 TYPE_UNSIGNED (prev_type));
7892 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7893 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7895 if (!optab3 || !optab4
7896 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7897 || insn_data[icode1].operand[0].mode != intermediate_mode
7898 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7899 || insn_data[icode2].operand[0].mode != intermediate_mode
7900 || ((icode1 = optab_handler (optab3, intermediate_mode))
7901 == CODE_FOR_nothing)
7902 || ((icode2 = optab_handler (optab4, intermediate_mode))
7903 == CODE_FOR_nothing))
7904 break;
7906 interm_types->quick_push (intermediate_type);
7907 (*multi_step_cvt)++;
7909 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7910 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7911 return true;
7913 prev_type = intermediate_type;
7914 prev_mode = intermediate_mode;
7917 interm_types->release ();
7918 return false;
7922 /* Function supportable_narrowing_operation
7924 Check whether an operation represented by the code CODE is a
7925 narrowing operation that is supported by the target platform in
7926 vector form (i.e., when operating on arguments of type VECTYPE_IN
7927 and producing a result of type VECTYPE_OUT).
7929 Narrowing operations we currently support are NOP (CONVERT) and
7930 FIX_TRUNC. This function checks if these operations are supported by
7931 the target platform directly via vector tree-codes.
7933 Output:
7934 - CODE1 is the code of a vector operation to be used when
7935 vectorizing the operation, if available.
7936 - MULTI_STEP_CVT determines the number of required intermediate steps in
7937 case of multi-step conversion (like int->short->char - in that case
7938 MULTI_STEP_CVT will be 1).
7939 - INTERM_TYPES contains the intermediate type required to perform the
7940 narrowing operation (short in the above example). */
7942 bool
7943 supportable_narrowing_operation (enum tree_code code,
7944 tree vectype_out, tree vectype_in,
7945 enum tree_code *code1, int *multi_step_cvt,
7946 vec<tree> *interm_types)
7948 enum machine_mode vec_mode;
7949 enum insn_code icode1;
7950 optab optab1, interm_optab;
7951 tree vectype = vectype_in;
7952 tree narrow_vectype = vectype_out;
7953 enum tree_code c1;
7954 tree intermediate_type;
7955 enum machine_mode intermediate_mode, prev_mode;
7956 int i;
7957 bool uns;
7959 *multi_step_cvt = 0;
7960 switch (code)
7962 CASE_CONVERT:
7963 c1 = VEC_PACK_TRUNC_EXPR;
7964 break;
7966 case FIX_TRUNC_EXPR:
7967 c1 = VEC_PACK_FIX_TRUNC_EXPR;
7968 break;
7970 case FLOAT_EXPR:
7971 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
7972 tree code and optabs used for computing the operation. */
7973 return false;
7975 default:
7976 gcc_unreachable ();
7979 if (code == FIX_TRUNC_EXPR)
7980 /* The signedness is determined from output operand. */
7981 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7982 else
7983 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7985 if (!optab1)
7986 return false;
7988 vec_mode = TYPE_MODE (vectype);
7989 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
7990 return false;
7992 *code1 = c1;
7994 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
7995 return true;
7997 /* Check if it's a multi-step conversion that can be done using intermediate
7998 types. */
7999 prev_mode = vec_mode;
8000 if (code == FIX_TRUNC_EXPR)
8001 uns = TYPE_UNSIGNED (vectype_out);
8002 else
8003 uns = TYPE_UNSIGNED (vectype);
8005 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8006 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8007 costly than signed. */
8008 if (code == FIX_TRUNC_EXPR && uns)
8010 enum insn_code icode2;
8012 intermediate_type
8013 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8014 interm_optab
8015 = optab_for_tree_code (c1, intermediate_type, optab_default);
8016 if (interm_optab != unknown_optab
8017 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8018 && insn_data[icode1].operand[0].mode
8019 == insn_data[icode2].operand[0].mode)
8021 uns = false;
8022 optab1 = interm_optab;
8023 icode1 = icode2;
8027 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8028 intermediate steps in promotion sequence. We try
8029 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8030 interm_types->create (MAX_INTERM_CVT_STEPS);
8031 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8033 intermediate_mode = insn_data[icode1].operand[0].mode;
8034 intermediate_type
8035 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8036 interm_optab
8037 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8038 optab_default);
8039 if (!interm_optab
8040 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8041 || insn_data[icode1].operand[0].mode != intermediate_mode
8042 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8043 == CODE_FOR_nothing))
8044 break;
8046 interm_types->quick_push (intermediate_type);
8047 (*multi_step_cvt)++;
8049 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8050 return true;
8052 prev_mode = intermediate_mode;
8053 optab1 = interm_optab;
8056 interm_types->release ();
8057 return false;