2014-03-14 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob70fb411f8f5443ab89115be5998da1fb227beb9b
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "tree-eh.h"
35 #include "gimple-expr.h"
36 #include "is-a.h"
37 #include "gimple.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-ssa.h"
42 #include "tree-cfg.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-manip.h"
48 #include "cfgloop.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
51 #include "expr.h"
52 #include "recog.h" /* FIXME: for insn_data */
53 #include "optabs.h"
54 #include "diagnostic-core.h"
55 #include "tree-vectorizer.h"
56 #include "dumpfile.h"
57 #include "cgraph.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (struct _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
75 gimple stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 struct loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if (body_cost_vec)
99 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
100 add_stmt_info_to_vec (body_cost_vec, count, kind,
101 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
102 misalign);
103 return (unsigned)
104 (builtin_vectorization_cost (kind, vectype, misalign) * count);
107 else
109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
110 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
111 void *target_cost_data;
113 if (loop_vinfo)
114 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
115 else
116 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
118 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
119 misalign, where);
123 /* Return a variable of type ELEM_TYPE[NELEMS]. */
125 static tree
126 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
128 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
129 "vect_array");
132 /* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
137 static tree
138 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
139 tree array, unsigned HOST_WIDE_INT n)
141 tree vect_type, vect, vect_name, array_ref;
142 gimple new_stmt;
144 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
145 vect_type = TREE_TYPE (TREE_TYPE (array));
146 vect = vect_create_destination_var (scalar_dest, vect_type);
147 array_ref = build4 (ARRAY_REF, vect_type, array,
148 build_int_cst (size_type_node, n),
149 NULL_TREE, NULL_TREE);
151 new_stmt = gimple_build_assign (vect, array_ref);
152 vect_name = make_ssa_name (vect, new_stmt);
153 gimple_assign_set_lhs (new_stmt, vect_name);
154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
156 return vect_name;
159 /* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
163 static void
164 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
165 tree array, unsigned HOST_WIDE_INT n)
167 tree array_ref;
168 gimple new_stmt;
170 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (array_ref, vect);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
178 /* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
180 (and its group). */
182 static tree
183 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
185 tree mem_ref, alias_ptr_type;
187 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
188 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
189 /* Arrays have the same alignment as their type. */
190 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
191 return mem_ref;
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
200 static void
201 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
202 enum vect_relevant relevant, bool live_p,
203 bool used_in_pattern)
205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
208 gimple pattern_stmt;
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE, vect_location,
212 "mark relevant %d, live %d.\n", relevant, live_p);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 bool found = false;
221 if (!used_in_pattern)
223 imm_use_iterator imm_iter;
224 use_operand_p use_p;
225 gimple use_stmt;
226 tree lhs;
227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 if (is_gimple_assign (stmt))
231 lhs = gimple_assign_lhs (stmt);
232 else
233 lhs = gimple_call_lhs (stmt);
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
237 stmt. */
238 if (lhs && TREE_CODE (lhs) == SSA_NAME)
239 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
241 if (is_gimple_debug (USE_STMT (use_p)))
242 continue;
243 use_stmt = USE_STMT (use_p);
245 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
246 continue;
248 if (vinfo_for_stmt (use_stmt)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
251 found = true;
252 break;
257 if (!found)
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
264 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_NOTE, vect_location,
268 "last stmt in pattern. don't mark"
269 " relevant/live.\n");
270 stmt_info = vinfo_for_stmt (pattern_stmt);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
272 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
273 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
274 stmt = pattern_stmt;
278 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
279 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
280 STMT_VINFO_RELEVANT (stmt_info) = relevant;
282 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
285 if (dump_enabled_p ())
286 dump_printf_loc (MSG_NOTE, vect_location,
287 "already marked relevant/live.\n");
288 return;
291 worklist->safe_push (stmt);
295 /* Function vect_stmt_relevant_p.
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
305 CHECKME: what other side effects would the vectorizer allow? */
307 static bool
308 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
309 enum vect_relevant *relevant, bool *live_p)
311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
312 ssa_op_iter op_iter;
313 imm_use_iterator imm_iter;
314 use_operand_p use_p;
315 def_operand_p def_p;
317 *relevant = vect_unused_in_scope;
318 *live_p = false;
320 /* cond stmt other than loop exit cond. */
321 if (is_ctrl_stmt (stmt)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
323 != loop_exit_ctrl_vec_info_type)
324 *relevant = vect_used_in_scope;
326 /* changing memory. */
327 if (gimple_code (stmt) != GIMPLE_PHI)
328 if (gimple_vdef (stmt))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: stmt has vdefs.\n");
333 *relevant = vect_used_in_scope;
336 /* uses outside the loop. */
337 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
339 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
341 basic_block bb = gimple_bb (USE_STMT (use_p));
342 if (!flow_bb_inside_loop_p (loop, bb))
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE, vect_location,
346 "vec_stmt_relevant_p: used out of loop.\n");
348 if (is_gimple_debug (USE_STMT (use_p)))
349 continue;
351 /* We expect all such uses to be in the loop exit phis
352 (because of loop closed form) */
353 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
354 gcc_assert (bb == single_exit (loop)->dest);
356 *live_p = true;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
373 tree operand;
374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info))
380 return true;
382 /* STMT has a data_ref. FORNOW this means that its of one of
383 the following forms:
384 -1- ARRAY_REF = var
385 -2- var = ARRAY_REF
386 (This should have been verified in analyze_data_refs).
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
390 for array indexing.
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
395 if (!gimple_assign_copy_p (stmt))
397 if (is_gimple_call (stmt)
398 && gimple_call_internal_p (stmt))
399 switch (gimple_call_internal_fn (stmt))
401 case IFN_MASK_STORE:
402 operand = gimple_call_arg (stmt, 3);
403 if (operand == use)
404 return true;
405 /* FALLTHRU */
406 case IFN_MASK_LOAD:
407 operand = gimple_call_arg (stmt, 2);
408 if (operand == use)
409 return true;
410 break;
411 default:
412 break;
414 return false;
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
419 operand = gimple_assign_rhs1 (stmt);
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
423 if (operand == use)
424 return true;
426 return false;
431 Function process_use.
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
457 static bool
458 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
459 enum vect_relevant relevant, vec<gimple> *worklist,
460 bool force)
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
466 tree def;
467 gimple def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 relevant = vect_used_in_outer_by_reduction;
579 break;
581 case vect_used_in_scope:
582 relevant = vect_used_in_outer;
583 break;
585 default:
586 gcc_unreachable ();
590 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
591 is_pattern_stmt_p (stmt_vinfo));
592 return true;
596 /* Function vect_mark_stmts_to_be_vectorized.
598 Not all stmts in the loop need to be vectorized. For example:
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
605 3. j = j + 1
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
610 This pass detects such stmts. */
612 bool
613 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
619 gimple stmt;
620 unsigned int i;
621 stmt_vec_info stmt_vinfo;
622 basic_block bb;
623 gimple phi;
624 bool live_p;
625 enum vect_relevant relevant, tmp_relevant;
626 enum vect_def_type def_type;
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location,
630 "=== vect_mark_stmts_to_be_vectorized ===\n");
632 auto_vec<gimple, 64> worklist;
634 /* 1. Init worklist. */
635 for (i = 0; i < nbbs; i++)
637 bb = bbs[i];
638 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
640 phi = gsi_stmt (si);
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
645 dump_printf (MSG_NOTE, "\n");
648 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
649 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
651 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
653 stmt = gsi_stmt (si);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
658 dump_printf (MSG_NOTE, "\n");
661 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
662 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
666 /* 2. Process_worklist */
667 while (worklist.length () > 0)
669 use_operand_p use_p;
670 ssa_op_iter iter;
672 stmt = worklist.pop ();
673 if (dump_enabled_p ())
675 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
676 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
677 dump_printf (MSG_NOTE, "\n");
680 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
681 (DEF_STMT) as relevant/irrelevant and live/dead according to the
682 liveness and relevance properties of STMT. */
683 stmt_vinfo = vinfo_for_stmt (stmt);
684 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
685 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
687 /* Generally, the liveness and relevance properties of STMT are
688 propagated as is to the DEF_STMTs of its USEs:
689 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
690 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
692 One exception is when STMT has been identified as defining a reduction
693 variable; in this case we set the liveness/relevance as follows:
694 live_p = false
695 relevant = vect_used_by_reduction
696 This is because we distinguish between two kinds of relevant stmts -
697 those that are used by a reduction computation, and those that are
698 (also) used by a regular computation. This allows us later on to
699 identify stmts that are used solely by a reduction, and therefore the
700 order of the results that they produce does not have to be kept. */
702 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
703 tmp_relevant = relevant;
704 switch (def_type)
706 case vect_reduction_def:
707 switch (tmp_relevant)
709 case vect_unused_in_scope:
710 relevant = vect_used_by_reduction;
711 break;
713 case vect_used_by_reduction:
714 if (gimple_code (stmt) == GIMPLE_PHI)
715 break;
716 /* fall through */
718 default:
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of reduction.\n");
722 return false;
725 live_p = false;
726 break;
728 case vect_nested_cycle:
729 if (tmp_relevant != vect_unused_in_scope
730 && tmp_relevant != vect_used_in_outer_by_reduction
731 && tmp_relevant != vect_used_in_outer)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of nested cycle.\n");
737 return false;
740 live_p = false;
741 break;
743 case vect_double_reduction_def:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_by_reduction)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
749 "unsupported use of double reduction.\n");
751 return false;
754 live_p = false;
755 break;
757 default:
758 break;
761 if (is_pattern_stmt_p (stmt_vinfo))
763 /* Pattern statements are not inserted into the code, so
764 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
765 have to scan the RHS or function arguments instead. */
766 if (is_gimple_assign (stmt))
768 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
769 tree op = gimple_assign_rhs1 (stmt);
771 i = 1;
772 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
774 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
775 live_p, relevant, &worklist, false)
776 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
777 live_p, relevant, &worklist, false))
778 return false;
779 i = 2;
781 for (; i < gimple_num_ops (stmt); i++)
783 op = gimple_op (stmt, i);
784 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
785 &worklist, false))
786 return false;
789 else if (is_gimple_call (stmt))
791 for (i = 0; i < gimple_call_num_args (stmt); i++)
793 tree arg = gimple_call_arg (stmt, i);
794 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
795 &worklist, false))
796 return false;
800 else
801 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
803 tree op = USE_FROM_PTR (use_p);
804 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
805 &worklist, false))
806 return false;
809 if (STMT_VINFO_GATHER_P (stmt_vinfo))
811 tree off;
812 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
813 gcc_assert (decl);
814 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
815 &worklist, true))
816 return false;
818 } /* while worklist */
820 return true;
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 void
831 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
832 enum vect_def_type *dt,
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
836 int i;
837 int inside_cost = 0, prologue_cost = 0;
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info))
841 return;
843 /* FORNOW: Assuming maximum 2 args per stmts. */
844 for (i = 0; i < 2; i++)
845 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
846 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
847 stmt_info, 0, vect_prologue);
849 /* Pass the inside-of-loop statements to the target-specific cost model. */
850 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
851 stmt_info, 0, vect_body);
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_NOTE, vect_location,
855 "vect_model_simple_cost: inside_cost = %d, "
856 "prologue_cost = %d .\n", inside_cost, prologue_cost);
860 /* Model cost for type demotion and promotion operations. PWR is normally
861 zero for single-step promotions and demotions. It will be one if
862 two-step promotion/demotion is required, and so on. Each additional
863 step doubles the number of instructions required. */
865 static void
866 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
867 enum vect_def_type *dt, int pwr)
869 int i, tmp;
870 int inside_cost = 0, prologue_cost = 0;
871 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
872 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
873 void *target_cost_data;
875 /* The SLP costs were already calculated during SLP tree build. */
876 if (PURE_SLP_STMT (stmt_info))
877 return;
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
884 for (i = 0; i < pwr + 1; i++)
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
899 if (dump_enabled_p ())
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
905 /* Function vect_cost_group_size
907 For grouped load or store, return the group_size only if it is the first
908 load or store of a group, else return 1. This ensures that group size is
909 only returned once per group. */
911 static int
912 vect_cost_group_size (stmt_vec_info stmt_info)
914 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
916 if (first_stmt == STMT_VINFO_STMT (stmt_info))
917 return GROUP_SIZE (stmt_info);
919 return 1;
923 /* Function vect_model_store_cost
925 Models cost for stores. In the case of grouped accesses, one access
926 has the overhead of the grouped access attributed to it. */
928 void
929 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
930 bool store_lanes_p, enum vect_def_type dt,
931 slp_tree slp_node,
932 stmt_vector_for_cost *prologue_cost_vec,
933 stmt_vector_for_cost *body_cost_vec)
935 int group_size;
936 unsigned int inside_cost = 0, prologue_cost = 0;
937 struct data_reference *first_dr;
938 gimple first_stmt;
940 /* The SLP costs were already calculated during SLP tree build. */
941 if (PURE_SLP_STMT (stmt_info))
942 return;
944 if (dt == vect_constant_def || dt == vect_external_def)
945 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
946 stmt_info, 0, vect_prologue);
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
951 if (slp_node)
953 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
954 group_size = 1;
956 else
958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
959 group_size = vect_cost_group_size (stmt_info);
962 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
964 /* Not a grouped access. */
965 else
967 group_size = 1;
968 first_dr = STMT_VINFO_DATA_REF (stmt_info);
971 /* We assume that the cost of a single store-lanes instruction is
972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p && group_size > 1)
977 /* Uses a high and low interleave operation for each needed permute. */
979 int nstmts = ncopies * exact_log2 (group_size) * group_size;
980 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
981 stmt_info, 0, vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: strided group_size = %d .\n",
986 group_size);
989 /* Costs of the stores. */
990 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: inside_cost = %d, "
995 "prologue_cost = %d .\n", inside_cost, prologue_cost);
999 /* Calculate cost of DR's memory access. */
1000 void
1001 vect_get_store_cost (struct data_reference *dr, int ncopies,
1002 unsigned int *inside_cost,
1003 stmt_vector_for_cost *body_cost_vec)
1005 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1006 gimple stmt = DR_STMT (dr);
1007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1009 switch (alignment_support_scheme)
1011 case dr_aligned:
1013 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1014 vector_store, stmt_info, 0,
1015 vect_body);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: aligned.\n");
1020 break;
1023 case dr_unaligned_supported:
1025 /* Here, we assign an additional cost for the unaligned store. */
1026 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1027 unaligned_store, stmt_info,
1028 DR_MISALIGNMENT (dr), vect_body);
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "vect_model_store_cost: unaligned supported by "
1032 "hardware.\n");
1033 break;
1036 case dr_unaligned_unsupported:
1038 *inside_cost = VECT_MAX_COST;
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 "vect_model_store_cost: unsupported access.\n");
1043 break;
1046 default:
1047 gcc_unreachable ();
1052 /* Function vect_model_load_cost
1054 Models cost for loads. In the case of grouped accesses, the last access
1055 has the overhead of the grouped access attributed to it. Since unaligned
1056 accesses are supported for loads, we also account for the costs of the
1057 access scheme chosen. */
1059 void
1060 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1061 bool load_lanes_p, slp_tree slp_node,
1062 stmt_vector_for_cost *prologue_cost_vec,
1063 stmt_vector_for_cost *body_cost_vec)
1065 int group_size;
1066 gimple first_stmt;
1067 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1068 unsigned int inside_cost = 0, prologue_cost = 0;
1070 /* The SLP costs were already calculated during SLP tree build. */
1071 if (PURE_SLP_STMT (stmt_info))
1072 return;
1074 /* Grouped accesses? */
1075 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1078 group_size = vect_cost_group_size (stmt_info);
1079 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1081 /* Not a grouped access. */
1082 else
1084 group_size = 1;
1085 first_dr = dr;
1088 /* We assume that the cost of a single load-lanes instruction is
1089 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1090 access is instead being provided by a load-and-permute operation,
1091 include the cost of the permutes. */
1092 if (!load_lanes_p && group_size > 1)
1094 /* Uses an even and odd extract operations for each needed permute. */
1095 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1096 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1097 stmt_info, 0, vect_body);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_load_cost: strided group_size = %d .\n",
1102 group_size);
1105 /* The loads themselves. */
1106 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1108 /* N scalar loads plus gathering them into a vector. */
1109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1110 inside_cost += record_stmt_cost (body_cost_vec,
1111 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1112 scalar_load, stmt_info, 0, vect_body);
1113 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1114 stmt_info, 0, vect_body);
1116 else
1117 vect_get_load_cost (first_dr, ncopies,
1118 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1119 || group_size > 1 || slp_node),
1120 &inside_cost, &prologue_cost,
1121 prologue_cost_vec, body_cost_vec, true);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: inside_cost = %d, "
1126 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1130 /* Calculate cost of DR's memory access. */
1131 void
1132 vect_get_load_cost (struct data_reference *dr, int ncopies,
1133 bool add_realign_cost, unsigned int *inside_cost,
1134 unsigned int *prologue_cost,
1135 stmt_vector_for_cost *prologue_cost_vec,
1136 stmt_vector_for_cost *body_cost_vec,
1137 bool record_prologue_costs)
1139 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1140 gimple stmt = DR_STMT (dr);
1141 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1143 switch (alignment_support_scheme)
1145 case dr_aligned:
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1148 stmt_info, 0, vect_body);
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_NOTE, vect_location,
1152 "vect_model_load_cost: aligned.\n");
1154 break;
1156 case dr_unaligned_supported:
1158 /* Here, we assign an additional cost for the unaligned load. */
1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1160 unaligned_load, stmt_info,
1161 DR_MISALIGNMENT (dr), vect_body);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: unaligned supported by "
1166 "hardware.\n");
1168 break;
1170 case dr_explicit_realign:
1172 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1173 vector_load, stmt_info, 0, vect_body);
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 vec_perm, stmt_info, 0, vect_body);
1177 /* FIXME: If the misalignment remains fixed across the iterations of
1178 the containing loop, the following cost should be added to the
1179 prologue costs. */
1180 if (targetm.vectorize.builtin_mask_for_load)
1181 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1182 stmt_info, 0, vect_body);
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE, vect_location,
1186 "vect_model_load_cost: explicit realign\n");
1188 break;
1190 case dr_explicit_realign_optimized:
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: unaligned software "
1195 "pipelined.\n");
1197 /* Unaligned software pipeline has a load of an address, an initial
1198 load, and possibly a mask operation to "prime" the loop. However,
1199 if this is an access in a group of loads, which provide grouped
1200 access, then the above cost should only be considered for one
1201 access in the group. Inside the loop, there is a load op
1202 and a realignment op. */
1204 if (add_realign_cost && record_prologue_costs)
1206 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1207 vector_stmt, stmt_info,
1208 0, vect_prologue);
1209 if (targetm.vectorize.builtin_mask_for_load)
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1215 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1216 stmt_info, 0, vect_body);
1217 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1218 stmt_info, 0, vect_body);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE, vect_location,
1222 "vect_model_load_cost: explicit realign optimized"
1223 "\n");
1225 break;
1228 case dr_unaligned_unsupported:
1230 *inside_cost = VECT_MAX_COST;
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1234 "vect_model_load_cost: unsupported access.\n");
1235 break;
1238 default:
1239 gcc_unreachable ();
1243 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1244 the loop preheader for the vectorized stmt STMT. */
1246 static void
1247 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1249 if (gsi)
1250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1251 else
1253 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1254 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1256 if (loop_vinfo)
1258 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1259 basic_block new_bb;
1260 edge pe;
1262 if (nested_in_vect_loop_p (loop, stmt))
1263 loop = loop->inner;
1265 pe = loop_preheader_edge (loop);
1266 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1267 gcc_assert (!new_bb);
1269 else
1271 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1272 basic_block bb;
1273 gimple_stmt_iterator gsi_bb_start;
1275 gcc_assert (bb_vinfo);
1276 bb = BB_VINFO_BB (bb_vinfo);
1277 gsi_bb_start = gsi_after_labels (bb);
1278 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1282 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "created new init_stmt: ");
1286 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1287 dump_printf (MSG_NOTE, "\n");
1291 /* Function vect_init_vector.
1293 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1294 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1295 vector type a vector with all elements equal to VAL is created first.
1296 Place the initialization at BSI if it is not NULL. Otherwise, place the
1297 initialization at the loop preheader.
1298 Return the DEF of INIT_STMT.
1299 It will be used in the vectorization of STMT. */
1301 tree
1302 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1304 tree new_var;
1305 gimple init_stmt;
1306 tree vec_oprnd;
1307 tree new_temp;
1309 if (TREE_CODE (type) == VECTOR_TYPE
1310 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1312 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1314 if (CONSTANT_CLASS_P (val))
1315 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1316 else
1318 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1319 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1320 new_temp, val,
1321 NULL_TREE);
1322 vect_init_vector_1 (stmt, init_stmt, gsi);
1323 val = new_temp;
1326 val = build_vector_from_val (type, val);
1329 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1330 init_stmt = gimple_build_assign (new_var, val);
1331 new_temp = make_ssa_name (new_var, init_stmt);
1332 gimple_assign_set_lhs (init_stmt, new_temp);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 vec_oprnd = gimple_assign_lhs (init_stmt);
1335 return vec_oprnd;
1339 /* Function vect_get_vec_def_for_operand.
1341 OP is an operand in STMT. This function returns a (vector) def that will be
1342 used in the vectorized stmt for STMT.
1344 In the case that OP is an SSA_NAME which is defined in the loop, then
1345 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1347 In case OP is an invariant or constant, a new stmt that creates a vector def
1348 needs to be introduced. */
1350 tree
1351 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1353 tree vec_oprnd;
1354 gimple vec_stmt;
1355 gimple def_stmt;
1356 stmt_vec_info def_stmt_info = NULL;
1357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1358 unsigned int nunits;
1359 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1360 tree def;
1361 enum vect_def_type dt;
1362 bool is_simple_use;
1363 tree vector_type;
1365 if (dump_enabled_p ())
1367 dump_printf_loc (MSG_NOTE, vect_location,
1368 "vect_get_vec_def_for_operand: ");
1369 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1370 dump_printf (MSG_NOTE, "\n");
1373 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1374 &def_stmt, &def, &dt);
1375 gcc_assert (is_simple_use);
1376 if (dump_enabled_p ())
1378 int loc_printed = 0;
1379 if (def)
1381 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1382 loc_printed = 1;
1383 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1384 dump_printf (MSG_NOTE, "\n");
1386 if (def_stmt)
1388 if (loc_printed)
1389 dump_printf (MSG_NOTE, " def_stmt = ");
1390 else
1391 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1393 dump_printf (MSG_NOTE, "\n");
1397 switch (dt)
1399 /* Case 1: operand is a constant. */
1400 case vect_constant_def:
1402 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1403 gcc_assert (vector_type);
1404 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1406 if (scalar_def)
1407 *scalar_def = op;
1409 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1410 if (dump_enabled_p ())
1411 dump_printf_loc (MSG_NOTE, vect_location,
1412 "Create vector_cst. nunits = %d\n", nunits);
1414 return vect_init_vector (stmt, op, vector_type, NULL);
1417 /* Case 2: operand is defined outside the loop - loop invariant. */
1418 case vect_external_def:
1420 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1421 gcc_assert (vector_type);
1423 if (scalar_def)
1424 *scalar_def = def;
1426 /* Create 'vec_inv = {inv,inv,..,inv}' */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1430 return vect_init_vector (stmt, def, vector_type, NULL);
1433 /* Case 3: operand is defined inside the loop. */
1434 case vect_internal_def:
1436 if (scalar_def)
1437 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1439 /* Get the def from the vectorized stmt. */
1440 def_stmt_info = vinfo_for_stmt (def_stmt);
1442 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1443 /* Get vectorized pattern statement. */
1444 if (!vec_stmt
1445 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1446 && !STMT_VINFO_RELEVANT (def_stmt_info))
1447 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1448 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1449 gcc_assert (vec_stmt);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else if (is_gimple_call (vec_stmt))
1453 vec_oprnd = gimple_call_lhs (vec_stmt);
1454 else
1455 vec_oprnd = gimple_assign_lhs (vec_stmt);
1456 return vec_oprnd;
1459 /* Case 4: operand is defined by a loop header phi - reduction */
1460 case vect_reduction_def:
1461 case vect_double_reduction_def:
1462 case vect_nested_cycle:
1464 struct loop *loop;
1466 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1467 loop = (gimple_bb (def_stmt))->loop_father;
1469 /* Get the def before the loop */
1470 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1471 return get_initial_def_for_reduction (stmt, op, scalar_def);
1474 /* Case 5: operand is defined by loop-header phi - induction. */
1475 case vect_induction_def:
1477 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info = vinfo_for_stmt (def_stmt);
1481 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1482 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1483 vec_oprnd = PHI_RESULT (vec_stmt);
1484 else
1485 vec_oprnd = gimple_get_lhs (vec_stmt);
1486 return vec_oprnd;
1489 default:
1490 gcc_unreachable ();
1495 /* Function vect_get_vec_def_for_stmt_copy
1497 Return a vector-def for an operand. This function is used when the
1498 vectorized stmt to be created (by the caller to this function) is a "copy"
1499 created in case the vectorized result cannot fit in one vector, and several
1500 copies of the vector-stmt are required. In this case the vector-def is
1501 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1502 of the stmt that defines VEC_OPRND.
1503 DT is the type of the vector def VEC_OPRND.
1505 Context:
1506 In case the vectorization factor (VF) is bigger than the number
1507 of elements that can fit in a vectype (nunits), we have to generate
1508 more than one vector stmt to vectorize the scalar stmt. This situation
1509 arises when there are multiple data-types operated upon in the loop; the
1510 smallest data-type determines the VF, and as a result, when vectorizing
1511 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1512 vector stmt (each computing a vector of 'nunits' results, and together
1513 computing 'VF' results in each iteration). This function is called when
1514 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1515 which VF=16 and nunits=4, so the number of copies required is 4):
1517 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1519 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1520 VS1.1: vx.1 = memref1 VS1.2
1521 VS1.2: vx.2 = memref2 VS1.3
1522 VS1.3: vx.3 = memref3
1524 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1525 VSnew.1: vz1 = vx.1 + ... VSnew.2
1526 VSnew.2: vz2 = vx.2 + ... VSnew.3
1527 VSnew.3: vz3 = vx.3 + ...
1529 The vectorization of S1 is explained in vectorizable_load.
1530 The vectorization of S2:
1531 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1532 the function 'vect_get_vec_def_for_operand' is called to
1533 get the relevant vector-def for each operand of S2. For operand x it
1534 returns the vector-def 'vx.0'.
1536 To create the remaining copies of the vector-stmt (VSnew.j), this
1537 function is called to get the relevant vector-def for each operand. It is
1538 obtained from the respective VS1.j stmt, which is recorded in the
1539 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1541 For example, to obtain the vector-def 'vx.1' in order to create the
1542 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1543 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1544 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1545 and return its def ('vx.1').
1546 Overall, to create the above sequence this function will be called 3 times:
1547 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1548 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1549 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1551 tree
1552 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1554 gimple vec_stmt_for_operand;
1555 stmt_vec_info def_stmt_info;
1557 /* Do nothing; can reuse same def. */
1558 if (dt == vect_external_def || dt == vect_constant_def )
1559 return vec_oprnd;
1561 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1562 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1563 gcc_assert (def_stmt_info);
1564 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1565 gcc_assert (vec_stmt_for_operand);
1566 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1567 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1568 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1569 else
1570 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1571 return vec_oprnd;
1575 /* Get vectorized definitions for the operands to create a copy of an original
1576 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1578 static void
1579 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1580 vec<tree> *vec_oprnds0,
1581 vec<tree> *vec_oprnds1)
1583 tree vec_oprnd = vec_oprnds0->pop ();
1585 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1586 vec_oprnds0->quick_push (vec_oprnd);
1588 if (vec_oprnds1 && vec_oprnds1->length ())
1590 vec_oprnd = vec_oprnds1->pop ();
1591 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1592 vec_oprnds1->quick_push (vec_oprnd);
1597 /* Get vectorized definitions for OP0 and OP1.
1598 REDUC_INDEX is the index of reduction operand in case of reduction,
1599 and -1 otherwise. */
1601 void
1602 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1603 vec<tree> *vec_oprnds0,
1604 vec<tree> *vec_oprnds1,
1605 slp_tree slp_node, int reduc_index)
1607 if (slp_node)
1609 int nops = (op1 == NULL_TREE) ? 1 : 2;
1610 auto_vec<tree> ops (nops);
1611 auto_vec<vec<tree> > vec_defs (nops);
1613 ops.quick_push (op0);
1614 if (op1)
1615 ops.quick_push (op1);
1617 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1619 *vec_oprnds0 = vec_defs[0];
1620 if (op1)
1621 *vec_oprnds1 = vec_defs[1];
1623 else
1625 tree vec_oprnd;
1627 vec_oprnds0->create (1);
1628 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1629 vec_oprnds0->quick_push (vec_oprnd);
1631 if (op1)
1633 vec_oprnds1->create (1);
1634 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1635 vec_oprnds1->quick_push (vec_oprnd);
1641 /* Function vect_finish_stmt_generation.
1643 Insert a new stmt. */
1645 void
1646 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1647 gimple_stmt_iterator *gsi)
1649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1658 gimple at_stmt = gsi_stmt (*gsi);
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1683 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1684 bb_vinfo));
1686 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1690 dump_printf (MSG_NOTE, "\n");
1693 gimple_set_location (vec_stmt, gimple_location (stmt));
1695 /* While EH edges will generally prevent vectorization, stmt might
1696 e.g. be in a must-not-throw region. Ensure newly created stmts
1697 that could throw are part of the same region. */
1698 int lp_nr = lookup_stmt_eh_lp (stmt);
1699 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1700 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1703 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1704 a function declaration if the target has a vectorized version
1705 of the function, or NULL_TREE if the function cannot be vectorized. */
1707 tree
1708 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1710 tree fndecl = gimple_call_fndecl (call);
1712 /* We only handle functions that do not read or clobber memory -- i.e.
1713 const or novops ones. */
1714 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1715 return NULL_TREE;
1717 if (!fndecl
1718 || TREE_CODE (fndecl) != FUNCTION_DECL
1719 || !DECL_BUILT_IN (fndecl))
1720 return NULL_TREE;
1722 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1723 vectype_in);
1727 static tree permute_vec_elements (tree, tree, tree, gimple,
1728 gimple_stmt_iterator *);
1731 /* Function vectorizable_mask_load_store.
1733 Check if STMT performs a conditional load or store that can be vectorized.
1734 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1735 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1736 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1738 static bool
1739 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1740 gimple *vec_stmt, slp_tree slp_node)
1742 tree vec_dest = NULL;
1743 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1744 stmt_vec_info prev_stmt_info;
1745 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1746 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1747 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1748 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1750 tree elem_type;
1751 gimple new_stmt;
1752 tree dummy;
1753 tree dataref_ptr = NULL_TREE;
1754 gimple ptr_incr;
1755 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1756 int ncopies;
1757 int i, j;
1758 bool inv_p;
1759 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1760 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1761 int gather_scale = 1;
1762 enum vect_def_type gather_dt = vect_unknown_def_type;
1763 bool is_store;
1764 tree mask;
1765 gimple def_stmt;
1766 tree def;
1767 enum vect_def_type dt;
1769 if (slp_node != NULL)
1770 return false;
1772 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1773 gcc_assert (ncopies >= 1);
1775 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1776 mask = gimple_call_arg (stmt, 2);
1777 if (TYPE_PRECISION (TREE_TYPE (mask))
1778 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1779 return false;
1781 /* FORNOW. This restriction should be relaxed. */
1782 if (nested_in_vect_loop && ncopies > 1)
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1786 "multiple types in nested loop.");
1787 return false;
1790 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1791 return false;
1793 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1794 return false;
1796 if (!STMT_VINFO_DATA_REF (stmt_info))
1797 return false;
1799 elem_type = TREE_TYPE (vectype);
1801 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1802 return false;
1804 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1805 return false;
1807 if (STMT_VINFO_GATHER_P (stmt_info))
1809 gimple def_stmt;
1810 tree def;
1811 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1812 &gather_off, &gather_scale);
1813 gcc_assert (gather_decl);
1814 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1815 &def_stmt, &def, &gather_dt,
1816 &gather_off_vectype))
1818 if (dump_enabled_p ())
1819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1820 "gather index use not simple.");
1821 return false;
1824 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1825 tree masktype
1826 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1827 if (TREE_CODE (masktype) == INTEGER_TYPE)
1829 if (dump_enabled_p ())
1830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1831 "masked gather with integer mask not supported.");
1832 return false;
1835 else if (tree_int_cst_compare (nested_in_vect_loop
1836 ? STMT_VINFO_DR_STEP (stmt_info)
1837 : DR_STEP (dr), size_zero_node) <= 0)
1838 return false;
1839 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1840 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1841 return false;
1843 if (TREE_CODE (mask) != SSA_NAME)
1844 return false;
1846 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1847 &def_stmt, &def, &dt))
1848 return false;
1850 if (is_store)
1852 tree rhs = gimple_call_arg (stmt, 3);
1853 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1854 &def_stmt, &def, &dt))
1855 return false;
1858 if (!vec_stmt) /* transformation not required. */
1860 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1861 if (is_store)
1862 vect_model_store_cost (stmt_info, ncopies, false, dt,
1863 NULL, NULL, NULL);
1864 else
1865 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1866 return true;
1869 /** Transform. **/
1871 if (STMT_VINFO_GATHER_P (stmt_info))
1873 tree vec_oprnd0 = NULL_TREE, op;
1874 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1875 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1876 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1877 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1878 tree mask_perm_mask = NULL_TREE;
1879 edge pe = loop_preheader_edge (loop);
1880 gimple_seq seq;
1881 basic_block new_bb;
1882 enum { NARROW, NONE, WIDEN } modifier;
1883 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1885 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1886 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1887 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1888 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1889 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1890 scaletype = TREE_VALUE (arglist);
1891 gcc_checking_assert (types_compatible_p (srctype, rettype)
1892 && types_compatible_p (srctype, masktype));
1894 if (nunits == gather_off_nunits)
1895 modifier = NONE;
1896 else if (nunits == gather_off_nunits / 2)
1898 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1899 modifier = WIDEN;
1901 for (i = 0; i < gather_off_nunits; ++i)
1902 sel[i] = i | nunits;
1904 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1905 gcc_assert (perm_mask != NULL_TREE);
1907 else if (nunits == gather_off_nunits * 2)
1909 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1910 modifier = NARROW;
1912 for (i = 0; i < nunits; ++i)
1913 sel[i] = i < gather_off_nunits
1914 ? i : i + nunits - gather_off_nunits;
1916 perm_mask = vect_gen_perm_mask (vectype, sel);
1917 gcc_assert (perm_mask != NULL_TREE);
1918 ncopies *= 2;
1919 for (i = 0; i < nunits; ++i)
1920 sel[i] = i | gather_off_nunits;
1921 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1922 gcc_assert (mask_perm_mask != NULL_TREE);
1924 else
1925 gcc_unreachable ();
1927 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1929 ptr = fold_convert (ptrtype, gather_base);
1930 if (!is_gimple_min_invariant (ptr))
1932 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1933 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1934 gcc_assert (!new_bb);
1937 scale = build_int_cst (scaletype, gather_scale);
1939 prev_stmt_info = NULL;
1940 for (j = 0; j < ncopies; ++j)
1942 if (modifier == WIDEN && (j & 1))
1943 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1944 perm_mask, stmt, gsi);
1945 else if (j == 0)
1946 op = vec_oprnd0
1947 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1948 else
1949 op = vec_oprnd0
1950 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1952 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1955 == TYPE_VECTOR_SUBPARTS (idxtype));
1956 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1957 var = make_ssa_name (var, NULL);
1958 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1959 new_stmt
1960 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1961 op, NULL_TREE);
1962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1963 op = var;
1966 if (mask_perm_mask && (j & 1))
1967 mask_op = permute_vec_elements (mask_op, mask_op,
1968 mask_perm_mask, stmt, gsi);
1969 else
1971 if (j == 0)
1972 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1973 else
1975 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1976 &def_stmt, &def, &dt);
1977 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1980 mask_op = vec_mask;
1981 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1983 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1984 == TYPE_VECTOR_SUBPARTS (masktype));
1985 var = vect_get_new_vect_var (masktype, vect_simple_var,
1986 NULL);
1987 var = make_ssa_name (var, NULL);
1988 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1989 new_stmt
1990 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1991 mask_op, NULL_TREE);
1992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1993 mask_op = var;
1997 new_stmt
1998 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1999 scale);
2001 if (!useless_type_conversion_p (vectype, rettype))
2003 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2004 == TYPE_VECTOR_SUBPARTS (rettype));
2005 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2006 op = make_ssa_name (var, new_stmt);
2007 gimple_call_set_lhs (new_stmt, op);
2008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2009 var = make_ssa_name (vec_dest, NULL);
2010 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2011 new_stmt
2012 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2013 NULL_TREE);
2015 else
2017 var = make_ssa_name (vec_dest, new_stmt);
2018 gimple_call_set_lhs (new_stmt, var);
2021 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2023 if (modifier == NARROW)
2025 if ((j & 1) == 0)
2027 prev_res = var;
2028 continue;
2030 var = permute_vec_elements (prev_res, var,
2031 perm_mask, stmt, gsi);
2032 new_stmt = SSA_NAME_DEF_STMT (var);
2035 if (prev_stmt_info == NULL)
2036 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2037 else
2038 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2039 prev_stmt_info = vinfo_for_stmt (new_stmt);
2041 return true;
2043 else if (is_store)
2045 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2046 prev_stmt_info = NULL;
2047 for (i = 0; i < ncopies; i++)
2049 unsigned align, misalign;
2051 if (i == 0)
2053 tree rhs = gimple_call_arg (stmt, 3);
2054 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2055 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2056 /* We should have catched mismatched types earlier. */
2057 gcc_assert (useless_type_conversion_p (vectype,
2058 TREE_TYPE (vec_rhs)));
2059 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2060 NULL_TREE, &dummy, gsi,
2061 &ptr_incr, false, &inv_p);
2062 gcc_assert (!inv_p);
2064 else
2066 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2067 &def, &dt);
2068 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2069 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2070 &def, &dt);
2071 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2072 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2073 TYPE_SIZE_UNIT (vectype));
2076 align = TYPE_ALIGN_UNIT (vectype);
2077 if (aligned_access_p (dr))
2078 misalign = 0;
2079 else if (DR_MISALIGNMENT (dr) == -1)
2081 align = TYPE_ALIGN_UNIT (elem_type);
2082 misalign = 0;
2084 else
2085 misalign = DR_MISALIGNMENT (dr);
2086 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2087 misalign);
2088 new_stmt
2089 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2090 gimple_call_arg (stmt, 1),
2091 vec_mask, vec_rhs);
2092 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2093 if (i == 0)
2094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2095 else
2096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2097 prev_stmt_info = vinfo_for_stmt (new_stmt);
2100 else
2102 tree vec_mask = NULL_TREE;
2103 prev_stmt_info = NULL;
2104 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2105 for (i = 0; i < ncopies; i++)
2107 unsigned align, misalign;
2109 if (i == 0)
2111 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2112 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2113 NULL_TREE, &dummy, gsi,
2114 &ptr_incr, false, &inv_p);
2115 gcc_assert (!inv_p);
2117 else
2119 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2120 &def, &dt);
2121 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2122 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2123 TYPE_SIZE_UNIT (vectype));
2126 align = TYPE_ALIGN_UNIT (vectype);
2127 if (aligned_access_p (dr))
2128 misalign = 0;
2129 else if (DR_MISALIGNMENT (dr) == -1)
2131 align = TYPE_ALIGN_UNIT (elem_type);
2132 misalign = 0;
2134 else
2135 misalign = DR_MISALIGNMENT (dr);
2136 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2137 misalign);
2138 new_stmt
2139 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2140 gimple_call_arg (stmt, 1),
2141 vec_mask);
2142 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2144 if (i == 0)
2145 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2146 else
2147 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2148 prev_stmt_info = vinfo_for_stmt (new_stmt);
2152 return true;
2156 /* Function vectorizable_call.
2158 Check if STMT performs a function call that can be vectorized.
2159 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2160 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2161 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2163 static bool
2164 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2165 slp_tree slp_node)
2167 tree vec_dest;
2168 tree scalar_dest;
2169 tree op, type;
2170 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2172 tree vectype_out, vectype_in;
2173 int nunits_in;
2174 int nunits_out;
2175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2176 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2177 tree fndecl, new_temp, def, rhs_type;
2178 gimple def_stmt;
2179 enum vect_def_type dt[3]
2180 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2181 gimple new_stmt = NULL;
2182 int ncopies, j;
2183 vec<tree> vargs = vNULL;
2184 enum { NARROW, NONE, WIDEN } modifier;
2185 size_t i, nargs;
2186 tree lhs;
2188 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2189 return false;
2191 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2192 return false;
2194 /* Is STMT a vectorizable call? */
2195 if (!is_gimple_call (stmt))
2196 return false;
2198 if (gimple_call_internal_p (stmt)
2199 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2200 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2201 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2202 slp_node);
2204 if (gimple_call_lhs (stmt) == NULL_TREE
2205 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2206 return false;
2208 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2210 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2212 /* Process function arguments. */
2213 rhs_type = NULL_TREE;
2214 vectype_in = NULL_TREE;
2215 nargs = gimple_call_num_args (stmt);
2217 /* Bail out if the function has more than three arguments, we do not have
2218 interesting builtin functions to vectorize with more than two arguments
2219 except for fma. No arguments is also not good. */
2220 if (nargs == 0 || nargs > 3)
2221 return false;
2223 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2224 if (gimple_call_internal_p (stmt)
2225 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2227 nargs = 0;
2228 rhs_type = unsigned_type_node;
2231 for (i = 0; i < nargs; i++)
2233 tree opvectype;
2235 op = gimple_call_arg (stmt, i);
2237 /* We can only handle calls with arguments of the same type. */
2238 if (rhs_type
2239 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "argument types differ.\n");
2244 return false;
2246 if (!rhs_type)
2247 rhs_type = TREE_TYPE (op);
2249 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2250 &def_stmt, &def, &dt[i], &opvectype))
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "use not simple.\n");
2255 return false;
2258 if (!vectype_in)
2259 vectype_in = opvectype;
2260 else if (opvectype
2261 && opvectype != vectype_in)
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265 "argument vector types differ.\n");
2266 return false;
2269 /* If all arguments are external or constant defs use a vector type with
2270 the same size as the output vector type. */
2271 if (!vectype_in)
2272 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2273 if (vec_stmt)
2274 gcc_assert (vectype_in);
2275 if (!vectype_in)
2277 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2280 "no vectype for scalar type ");
2281 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2282 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2285 return false;
2288 /* FORNOW */
2289 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2290 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2291 if (nunits_in == nunits_out / 2)
2292 modifier = NARROW;
2293 else if (nunits_out == nunits_in)
2294 modifier = NONE;
2295 else if (nunits_out == nunits_in / 2)
2296 modifier = WIDEN;
2297 else
2298 return false;
2300 /* For now, we only vectorize functions if a target specific builtin
2301 is available. TODO -- in some cases, it might be profitable to
2302 insert the calls for pieces of the vector, in order to be able
2303 to vectorize other operations in the loop. */
2304 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2305 if (fndecl == NULL_TREE)
2307 if (gimple_call_internal_p (stmt)
2308 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2309 && !slp_node
2310 && loop_vinfo
2311 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2312 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2313 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2314 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2316 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2317 { 0, 1, 2, ... vf - 1 } vector. */
2318 gcc_assert (nargs == 0);
2320 else
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "function is not vectorizable.\n");
2325 return false;
2329 gcc_assert (!gimple_vuse (stmt));
2331 if (slp_node || PURE_SLP_STMT (stmt_info))
2332 ncopies = 1;
2333 else if (modifier == NARROW)
2334 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2335 else
2336 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2338 /* Sanity check: make sure that at least one copy of the vectorized stmt
2339 needs to be generated. */
2340 gcc_assert (ncopies >= 1);
2342 if (!vec_stmt) /* transformation not required. */
2344 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2347 "\n");
2348 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2349 return true;
2352 /** Transform. **/
2354 if (dump_enabled_p ())
2355 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2357 /* Handle def. */
2358 scalar_dest = gimple_call_lhs (stmt);
2359 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2361 prev_stmt_info = NULL;
2362 switch (modifier)
2364 case NONE:
2365 for (j = 0; j < ncopies; ++j)
2367 /* Build argument list for the vectorized call. */
2368 if (j == 0)
2369 vargs.create (nargs);
2370 else
2371 vargs.truncate (0);
2373 if (slp_node)
2375 auto_vec<vec<tree> > vec_defs (nargs);
2376 vec<tree> vec_oprnds0;
2378 for (i = 0; i < nargs; i++)
2379 vargs.quick_push (gimple_call_arg (stmt, i));
2380 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2381 vec_oprnds0 = vec_defs[0];
2383 /* Arguments are ready. Create the new vector stmt. */
2384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2386 size_t k;
2387 for (k = 0; k < nargs; k++)
2389 vec<tree> vec_oprndsk = vec_defs[k];
2390 vargs[k] = vec_oprndsk[i];
2392 new_stmt = gimple_build_call_vec (fndecl, vargs);
2393 new_temp = make_ssa_name (vec_dest, new_stmt);
2394 gimple_call_set_lhs (new_stmt, new_temp);
2395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2396 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2399 for (i = 0; i < nargs; i++)
2401 vec<tree> vec_oprndsi = vec_defs[i];
2402 vec_oprndsi.release ();
2404 continue;
2407 for (i = 0; i < nargs; i++)
2409 op = gimple_call_arg (stmt, i);
2410 if (j == 0)
2411 vec_oprnd0
2412 = vect_get_vec_def_for_operand (op, stmt, NULL);
2413 else
2415 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2416 vec_oprnd0
2417 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2420 vargs.quick_push (vec_oprnd0);
2423 if (gimple_call_internal_p (stmt)
2424 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2426 tree *v = XALLOCAVEC (tree, nunits_out);
2427 int k;
2428 for (k = 0; k < nunits_out; ++k)
2429 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2430 tree cst = build_vector (vectype_out, v);
2431 tree new_var
2432 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2433 gimple init_stmt = gimple_build_assign (new_var, cst);
2434 new_temp = make_ssa_name (new_var, init_stmt);
2435 gimple_assign_set_lhs (init_stmt, new_temp);
2436 vect_init_vector_1 (stmt, init_stmt, NULL);
2437 new_temp = make_ssa_name (vec_dest, NULL);
2438 new_stmt = gimple_build_assign (new_temp,
2439 gimple_assign_lhs (init_stmt));
2441 else
2443 new_stmt = gimple_build_call_vec (fndecl, vargs);
2444 new_temp = make_ssa_name (vec_dest, new_stmt);
2445 gimple_call_set_lhs (new_stmt, new_temp);
2447 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2449 if (j == 0)
2450 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2451 else
2452 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2454 prev_stmt_info = vinfo_for_stmt (new_stmt);
2457 break;
2459 case NARROW:
2460 for (j = 0; j < ncopies; ++j)
2462 /* Build argument list for the vectorized call. */
2463 if (j == 0)
2464 vargs.create (nargs * 2);
2465 else
2466 vargs.truncate (0);
2468 if (slp_node)
2470 auto_vec<vec<tree> > vec_defs (nargs);
2471 vec<tree> vec_oprnds0;
2473 for (i = 0; i < nargs; i++)
2474 vargs.quick_push (gimple_call_arg (stmt, i));
2475 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2476 vec_oprnds0 = vec_defs[0];
2478 /* Arguments are ready. Create the new vector stmt. */
2479 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2481 size_t k;
2482 vargs.truncate (0);
2483 for (k = 0; k < nargs; k++)
2485 vec<tree> vec_oprndsk = vec_defs[k];
2486 vargs.quick_push (vec_oprndsk[i]);
2487 vargs.quick_push (vec_oprndsk[i + 1]);
2489 new_stmt = gimple_build_call_vec (fndecl, vargs);
2490 new_temp = make_ssa_name (vec_dest, new_stmt);
2491 gimple_call_set_lhs (new_stmt, new_temp);
2492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2493 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2496 for (i = 0; i < nargs; i++)
2498 vec<tree> vec_oprndsi = vec_defs[i];
2499 vec_oprndsi.release ();
2501 continue;
2504 for (i = 0; i < nargs; i++)
2506 op = gimple_call_arg (stmt, i);
2507 if (j == 0)
2509 vec_oprnd0
2510 = vect_get_vec_def_for_operand (op, stmt, NULL);
2511 vec_oprnd1
2512 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2514 else
2516 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2517 vec_oprnd0
2518 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2519 vec_oprnd1
2520 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2523 vargs.quick_push (vec_oprnd0);
2524 vargs.quick_push (vec_oprnd1);
2527 new_stmt = gimple_build_call_vec (fndecl, vargs);
2528 new_temp = make_ssa_name (vec_dest, new_stmt);
2529 gimple_call_set_lhs (new_stmt, new_temp);
2530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2532 if (j == 0)
2533 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2534 else
2535 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2537 prev_stmt_info = vinfo_for_stmt (new_stmt);
2540 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2542 break;
2544 case WIDEN:
2545 /* No current target implements this case. */
2546 return false;
2549 vargs.release ();
2551 /* The call in STMT might prevent it from being removed in dce.
2552 We however cannot remove it here, due to the way the ssa name
2553 it defines is mapped to the new definition. So just replace
2554 rhs of the statement with something harmless. */
2556 if (slp_node)
2557 return true;
2559 type = TREE_TYPE (scalar_dest);
2560 if (is_pattern_stmt_p (stmt_info))
2561 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2562 else
2563 lhs = gimple_call_lhs (stmt);
2564 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2565 set_vinfo_for_stmt (new_stmt, stmt_info);
2566 set_vinfo_for_stmt (stmt, NULL);
2567 STMT_VINFO_STMT (stmt_info) = new_stmt;
2568 gsi_replace (gsi, new_stmt, false);
2570 return true;
2574 struct simd_call_arg_info
2576 tree vectype;
2577 tree op;
2578 enum vect_def_type dt;
2579 HOST_WIDE_INT linear_step;
2580 unsigned int align;
2583 /* Function vectorizable_simd_clone_call.
2585 Check if STMT performs a function call that can be vectorized
2586 by calling a simd clone of the function.
2587 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2588 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2589 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2591 static bool
2592 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2593 gimple *vec_stmt, slp_tree slp_node)
2595 tree vec_dest;
2596 tree scalar_dest;
2597 tree op, type;
2598 tree vec_oprnd0 = NULL_TREE;
2599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2600 tree vectype;
2601 unsigned int nunits;
2602 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2603 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2604 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2605 tree fndecl, new_temp, def;
2606 gimple def_stmt;
2607 gimple new_stmt = NULL;
2608 int ncopies, j;
2609 vec<simd_call_arg_info> arginfo = vNULL;
2610 vec<tree> vargs = vNULL;
2611 size_t i, nargs;
2612 tree lhs, rtype, ratype;
2613 vec<constructor_elt, va_gc> *ret_ctor_elts;
2615 /* Is STMT a vectorizable call? */
2616 if (!is_gimple_call (stmt))
2617 return false;
2619 fndecl = gimple_call_fndecl (stmt);
2620 if (fndecl == NULL_TREE)
2621 return false;
2623 struct cgraph_node *node = cgraph_get_node (fndecl);
2624 if (node == NULL || node->simd_clones == NULL)
2625 return false;
2627 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2628 return false;
2630 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2631 return false;
2633 if (gimple_call_lhs (stmt)
2634 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2635 return false;
2637 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2639 vectype = STMT_VINFO_VECTYPE (stmt_info);
2641 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2642 return false;
2644 /* FORNOW */
2645 if (slp_node || PURE_SLP_STMT (stmt_info))
2646 return false;
2648 /* Process function arguments. */
2649 nargs = gimple_call_num_args (stmt);
2651 /* Bail out if the function has zero arguments. */
2652 if (nargs == 0)
2653 return false;
2655 arginfo.create (nargs);
2657 for (i = 0; i < nargs; i++)
2659 simd_call_arg_info thisarginfo;
2660 affine_iv iv;
2662 thisarginfo.linear_step = 0;
2663 thisarginfo.align = 0;
2664 thisarginfo.op = NULL_TREE;
2666 op = gimple_call_arg (stmt, i);
2667 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2668 &def_stmt, &def, &thisarginfo.dt,
2669 &thisarginfo.vectype)
2670 || thisarginfo.dt == vect_uninitialized_def)
2672 if (dump_enabled_p ())
2673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2674 "use not simple.\n");
2675 arginfo.release ();
2676 return false;
2679 if (thisarginfo.dt == vect_constant_def
2680 || thisarginfo.dt == vect_external_def)
2681 gcc_assert (thisarginfo.vectype == NULL_TREE);
2682 else
2683 gcc_assert (thisarginfo.vectype != NULL_TREE);
2685 if (thisarginfo.dt != vect_constant_def
2686 && thisarginfo.dt != vect_external_def
2687 && loop_vinfo
2688 && TREE_CODE (op) == SSA_NAME
2689 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2690 && tree_fits_shwi_p (iv.step))
2692 thisarginfo.linear_step = tree_to_shwi (iv.step);
2693 thisarginfo.op = iv.base;
2695 else if ((thisarginfo.dt == vect_constant_def
2696 || thisarginfo.dt == vect_external_def)
2697 && POINTER_TYPE_P (TREE_TYPE (op)))
2698 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2700 arginfo.quick_push (thisarginfo);
2703 unsigned int badness = 0;
2704 struct cgraph_node *bestn = NULL;
2705 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2706 bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2707 else
2708 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2709 n = n->simdclone->next_clone)
2711 unsigned int this_badness = 0;
2712 if (n->simdclone->simdlen
2713 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2714 || n->simdclone->nargs != nargs)
2715 continue;
2716 if (n->simdclone->simdlen
2717 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2718 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2719 - exact_log2 (n->simdclone->simdlen)) * 1024;
2720 if (n->simdclone->inbranch)
2721 this_badness += 2048;
2722 int target_badness = targetm.simd_clone.usable (n);
2723 if (target_badness < 0)
2724 continue;
2725 this_badness += target_badness * 512;
2726 /* FORNOW: Have to add code to add the mask argument. */
2727 if (n->simdclone->inbranch)
2728 continue;
2729 for (i = 0; i < nargs; i++)
2731 switch (n->simdclone->args[i].arg_type)
2733 case SIMD_CLONE_ARG_TYPE_VECTOR:
2734 if (!useless_type_conversion_p
2735 (n->simdclone->args[i].orig_type,
2736 TREE_TYPE (gimple_call_arg (stmt, i))))
2737 i = -1;
2738 else if (arginfo[i].dt == vect_constant_def
2739 || arginfo[i].dt == vect_external_def
2740 || arginfo[i].linear_step)
2741 this_badness += 64;
2742 break;
2743 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2744 if (arginfo[i].dt != vect_constant_def
2745 && arginfo[i].dt != vect_external_def)
2746 i = -1;
2747 break;
2748 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2749 if (arginfo[i].dt == vect_constant_def
2750 || arginfo[i].dt == vect_external_def
2751 || (arginfo[i].linear_step
2752 != n->simdclone->args[i].linear_step))
2753 i = -1;
2754 break;
2755 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2756 /* FORNOW */
2757 i = -1;
2758 break;
2759 case SIMD_CLONE_ARG_TYPE_MASK:
2760 gcc_unreachable ();
2762 if (i == (size_t) -1)
2763 break;
2764 if (n->simdclone->args[i].alignment > arginfo[i].align)
2766 i = -1;
2767 break;
2769 if (arginfo[i].align)
2770 this_badness += (exact_log2 (arginfo[i].align)
2771 - exact_log2 (n->simdclone->args[i].alignment));
2773 if (i == (size_t) -1)
2774 continue;
2775 if (bestn == NULL || this_badness < badness)
2777 bestn = n;
2778 badness = this_badness;
2782 if (bestn == NULL)
2784 arginfo.release ();
2785 return false;
2788 for (i = 0; i < nargs; i++)
2789 if ((arginfo[i].dt == vect_constant_def
2790 || arginfo[i].dt == vect_external_def)
2791 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2793 arginfo[i].vectype
2794 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2795 i)));
2796 if (arginfo[i].vectype == NULL
2797 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2798 > bestn->simdclone->simdlen))
2800 arginfo.release ();
2801 return false;
2805 fndecl = bestn->decl;
2806 nunits = bestn->simdclone->simdlen;
2807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2809 /* If the function isn't const, only allow it in simd loops where user
2810 has asserted that at least nunits consecutive iterations can be
2811 performed using SIMD instructions. */
2812 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2813 && gimple_vuse (stmt))
2815 arginfo.release ();
2816 return false;
2819 /* Sanity check: make sure that at least one copy of the vectorized stmt
2820 needs to be generated. */
2821 gcc_assert (ncopies >= 1);
2823 if (!vec_stmt) /* transformation not required. */
2825 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2826 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_NOTE, vect_location,
2829 "=== vectorizable_simd_clone_call ===\n");
2830 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2831 arginfo.release ();
2832 return true;
2835 /** Transform. **/
2837 if (dump_enabled_p ())
2838 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2840 /* Handle def. */
2841 scalar_dest = gimple_call_lhs (stmt);
2842 vec_dest = NULL_TREE;
2843 rtype = NULL_TREE;
2844 ratype = NULL_TREE;
2845 if (scalar_dest)
2847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2848 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2849 if (TREE_CODE (rtype) == ARRAY_TYPE)
2851 ratype = rtype;
2852 rtype = TREE_TYPE (ratype);
2856 prev_stmt_info = NULL;
2857 for (j = 0; j < ncopies; ++j)
2859 /* Build argument list for the vectorized call. */
2860 if (j == 0)
2861 vargs.create (nargs);
2862 else
2863 vargs.truncate (0);
2865 for (i = 0; i < nargs; i++)
2867 unsigned int k, l, m, o;
2868 tree atype;
2869 op = gimple_call_arg (stmt, i);
2870 switch (bestn->simdclone->args[i].arg_type)
2872 case SIMD_CLONE_ARG_TYPE_VECTOR:
2873 atype = bestn->simdclone->args[i].vector_type;
2874 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2875 for (m = j * o; m < (j + 1) * o; m++)
2877 if (TYPE_VECTOR_SUBPARTS (atype)
2878 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2880 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2881 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2882 / TYPE_VECTOR_SUBPARTS (atype));
2883 gcc_assert ((k & (k - 1)) == 0);
2884 if (m == 0)
2885 vec_oprnd0
2886 = vect_get_vec_def_for_operand (op, stmt, NULL);
2887 else
2889 vec_oprnd0 = arginfo[i].op;
2890 if ((m & (k - 1)) == 0)
2891 vec_oprnd0
2892 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2893 vec_oprnd0);
2895 arginfo[i].op = vec_oprnd0;
2896 vec_oprnd0
2897 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2898 size_int (prec),
2899 bitsize_int ((m & (k - 1)) * prec));
2900 new_stmt
2901 = gimple_build_assign (make_ssa_name (atype, NULL),
2902 vec_oprnd0);
2903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2904 vargs.safe_push (gimple_assign_lhs (new_stmt));
2906 else
2908 k = (TYPE_VECTOR_SUBPARTS (atype)
2909 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2910 gcc_assert ((k & (k - 1)) == 0);
2911 vec<constructor_elt, va_gc> *ctor_elts;
2912 if (k != 1)
2913 vec_alloc (ctor_elts, k);
2914 else
2915 ctor_elts = NULL;
2916 for (l = 0; l < k; l++)
2918 if (m == 0 && l == 0)
2919 vec_oprnd0
2920 = vect_get_vec_def_for_operand (op, stmt, NULL);
2921 else
2922 vec_oprnd0
2923 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2924 arginfo[i].op);
2925 arginfo[i].op = vec_oprnd0;
2926 if (k == 1)
2927 break;
2928 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2929 vec_oprnd0);
2931 if (k == 1)
2932 vargs.safe_push (vec_oprnd0);
2933 else
2935 vec_oprnd0 = build_constructor (atype, ctor_elts);
2936 new_stmt
2937 = gimple_build_assign (make_ssa_name (atype, NULL),
2938 vec_oprnd0);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 vargs.safe_push (gimple_assign_lhs (new_stmt));
2944 break;
2945 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2946 vargs.safe_push (op);
2947 break;
2948 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2949 if (j == 0)
2951 gimple_seq stmts;
2952 arginfo[i].op
2953 = force_gimple_operand (arginfo[i].op, &stmts, true,
2954 NULL_TREE);
2955 if (stmts != NULL)
2957 basic_block new_bb;
2958 edge pe = loop_preheader_edge (loop);
2959 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2960 gcc_assert (!new_bb);
2962 tree phi_res = copy_ssa_name (op, NULL);
2963 gimple new_phi = create_phi_node (phi_res, loop->header);
2964 set_vinfo_for_stmt (new_phi,
2965 new_stmt_vec_info (new_phi, loop_vinfo,
2966 NULL));
2967 add_phi_arg (new_phi, arginfo[i].op,
2968 loop_preheader_edge (loop), UNKNOWN_LOCATION);
2969 enum tree_code code
2970 = POINTER_TYPE_P (TREE_TYPE (op))
2971 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2972 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2973 ? sizetype : TREE_TYPE (op);
2974 double_int cst
2975 = double_int::from_shwi
2976 (bestn->simdclone->args[i].linear_step);
2977 cst *= double_int::from_uhwi (ncopies * nunits);
2978 tree tcst = double_int_to_tree (type, cst);
2979 tree phi_arg = copy_ssa_name (op, NULL);
2980 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
2981 phi_res, tcst);
2982 gimple_stmt_iterator si = gsi_after_labels (loop->header);
2983 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
2984 set_vinfo_for_stmt (new_stmt,
2985 new_stmt_vec_info (new_stmt, loop_vinfo,
2986 NULL));
2987 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
2988 UNKNOWN_LOCATION);
2989 arginfo[i].op = phi_res;
2990 vargs.safe_push (phi_res);
2992 else
2994 enum tree_code code
2995 = POINTER_TYPE_P (TREE_TYPE (op))
2996 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2997 tree type = POINTER_TYPE_P (TREE_TYPE (op))
2998 ? sizetype : TREE_TYPE (op);
2999 double_int cst
3000 = double_int::from_shwi
3001 (bestn->simdclone->args[i].linear_step);
3002 cst *= double_int::from_uhwi (j * nunits);
3003 tree tcst = double_int_to_tree (type, cst);
3004 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3005 new_stmt
3006 = gimple_build_assign_with_ops (code, new_temp,
3007 arginfo[i].op, tcst);
3008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3009 vargs.safe_push (new_temp);
3011 break;
3012 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3013 default:
3014 gcc_unreachable ();
3018 new_stmt = gimple_build_call_vec (fndecl, vargs);
3019 if (vec_dest)
3021 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3022 if (ratype)
3023 new_temp = create_tmp_var (ratype, NULL);
3024 else if (TYPE_VECTOR_SUBPARTS (vectype)
3025 == TYPE_VECTOR_SUBPARTS (rtype))
3026 new_temp = make_ssa_name (vec_dest, new_stmt);
3027 else
3028 new_temp = make_ssa_name (rtype, new_stmt);
3029 gimple_call_set_lhs (new_stmt, new_temp);
3031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3033 if (vec_dest)
3035 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3037 unsigned int k, l;
3038 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3039 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3040 gcc_assert ((k & (k - 1)) == 0);
3041 for (l = 0; l < k; l++)
3043 tree t;
3044 if (ratype)
3046 t = build_fold_addr_expr (new_temp);
3047 t = build2 (MEM_REF, vectype, t,
3048 build_int_cst (TREE_TYPE (t),
3049 l * prec / BITS_PER_UNIT));
3051 else
3052 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3053 size_int (prec), bitsize_int (l * prec));
3054 new_stmt
3055 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3057 if (j == 0 && l == 0)
3058 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3059 else
3060 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3062 prev_stmt_info = vinfo_for_stmt (new_stmt);
3065 if (ratype)
3067 tree clobber = build_constructor (ratype, NULL);
3068 TREE_THIS_VOLATILE (clobber) = 1;
3069 new_stmt = gimple_build_assign (new_temp, clobber);
3070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3072 continue;
3074 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3076 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3077 / TYPE_VECTOR_SUBPARTS (rtype));
3078 gcc_assert ((k & (k - 1)) == 0);
3079 if ((j & (k - 1)) == 0)
3080 vec_alloc (ret_ctor_elts, k);
3081 if (ratype)
3083 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3084 for (m = 0; m < o; m++)
3086 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3087 size_int (m), NULL_TREE, NULL_TREE);
3088 new_stmt
3089 = gimple_build_assign (make_ssa_name (rtype, NULL),
3090 tem);
3091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3092 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3093 gimple_assign_lhs (new_stmt));
3095 tree clobber = build_constructor (ratype, NULL);
3096 TREE_THIS_VOLATILE (clobber) = 1;
3097 new_stmt = gimple_build_assign (new_temp, clobber);
3098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3100 else
3101 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3102 if ((j & (k - 1)) != k - 1)
3103 continue;
3104 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3105 new_stmt
3106 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3107 vec_oprnd0);
3108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3110 if ((unsigned) j == k - 1)
3111 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3112 else
3113 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3115 prev_stmt_info = vinfo_for_stmt (new_stmt);
3116 continue;
3118 else if (ratype)
3120 tree t = build_fold_addr_expr (new_temp);
3121 t = build2 (MEM_REF, vectype, t,
3122 build_int_cst (TREE_TYPE (t), 0));
3123 new_stmt
3124 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3126 tree clobber = build_constructor (ratype, NULL);
3127 TREE_THIS_VOLATILE (clobber) = 1;
3128 vect_finish_stmt_generation (stmt,
3129 gimple_build_assign (new_temp,
3130 clobber), gsi);
3134 if (j == 0)
3135 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3136 else
3137 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3139 prev_stmt_info = vinfo_for_stmt (new_stmt);
3142 vargs.release ();
3144 /* The call in STMT might prevent it from being removed in dce.
3145 We however cannot remove it here, due to the way the ssa name
3146 it defines is mapped to the new definition. So just replace
3147 rhs of the statement with something harmless. */
3149 if (slp_node)
3150 return true;
3152 if (scalar_dest)
3154 type = TREE_TYPE (scalar_dest);
3155 if (is_pattern_stmt_p (stmt_info))
3156 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3157 else
3158 lhs = gimple_call_lhs (stmt);
3159 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3161 else
3162 new_stmt = gimple_build_nop ();
3163 set_vinfo_for_stmt (new_stmt, stmt_info);
3164 set_vinfo_for_stmt (stmt, NULL);
3165 STMT_VINFO_STMT (stmt_info) = new_stmt;
3166 gsi_replace (gsi, new_stmt, false);
3167 unlink_stmt_vdef (stmt);
3169 return true;
3173 /* Function vect_gen_widened_results_half
3175 Create a vector stmt whose code, type, number of arguments, and result
3176 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3177 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3178 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3179 needs to be created (DECL is a function-decl of a target-builtin).
3180 STMT is the original scalar stmt that we are vectorizing. */
3182 static gimple
3183 vect_gen_widened_results_half (enum tree_code code,
3184 tree decl,
3185 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3186 tree vec_dest, gimple_stmt_iterator *gsi,
3187 gimple stmt)
3189 gimple new_stmt;
3190 tree new_temp;
3192 /* Generate half of the widened result: */
3193 if (code == CALL_EXPR)
3195 /* Target specific support */
3196 if (op_type == binary_op)
3197 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3198 else
3199 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3200 new_temp = make_ssa_name (vec_dest, new_stmt);
3201 gimple_call_set_lhs (new_stmt, new_temp);
3203 else
3205 /* Generic support */
3206 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3207 if (op_type != binary_op)
3208 vec_oprnd1 = NULL;
3209 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3210 vec_oprnd1);
3211 new_temp = make_ssa_name (vec_dest, new_stmt);
3212 gimple_assign_set_lhs (new_stmt, new_temp);
3214 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3216 return new_stmt;
3220 /* Get vectorized definitions for loop-based vectorization. For the first
3221 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3222 scalar operand), and for the rest we get a copy with
3223 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3224 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3225 The vectors are collected into VEC_OPRNDS. */
3227 static void
3228 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3229 vec<tree> *vec_oprnds, int multi_step_cvt)
3231 tree vec_oprnd;
3233 /* Get first vector operand. */
3234 /* All the vector operands except the very first one (that is scalar oprnd)
3235 are stmt copies. */
3236 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3237 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3238 else
3239 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3241 vec_oprnds->quick_push (vec_oprnd);
3243 /* Get second vector operand. */
3244 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3245 vec_oprnds->quick_push (vec_oprnd);
3247 *oprnd = vec_oprnd;
3249 /* For conversion in multiple steps, continue to get operands
3250 recursively. */
3251 if (multi_step_cvt)
3252 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3256 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3257 For multi-step conversions store the resulting vectors and call the function
3258 recursively. */
3260 static void
3261 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3262 int multi_step_cvt, gimple stmt,
3263 vec<tree> vec_dsts,
3264 gimple_stmt_iterator *gsi,
3265 slp_tree slp_node, enum tree_code code,
3266 stmt_vec_info *prev_stmt_info)
3268 unsigned int i;
3269 tree vop0, vop1, new_tmp, vec_dest;
3270 gimple new_stmt;
3271 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3273 vec_dest = vec_dsts.pop ();
3275 for (i = 0; i < vec_oprnds->length (); i += 2)
3277 /* Create demotion operation. */
3278 vop0 = (*vec_oprnds)[i];
3279 vop1 = (*vec_oprnds)[i + 1];
3280 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3281 new_tmp = make_ssa_name (vec_dest, new_stmt);
3282 gimple_assign_set_lhs (new_stmt, new_tmp);
3283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3285 if (multi_step_cvt)
3286 /* Store the resulting vector for next recursive call. */
3287 (*vec_oprnds)[i/2] = new_tmp;
3288 else
3290 /* This is the last step of the conversion sequence. Store the
3291 vectors in SLP_NODE or in vector info of the scalar statement
3292 (or in STMT_VINFO_RELATED_STMT chain). */
3293 if (slp_node)
3294 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3295 else
3297 if (!*prev_stmt_info)
3298 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3299 else
3300 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3302 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3307 /* For multi-step demotion operations we first generate demotion operations
3308 from the source type to the intermediate types, and then combine the
3309 results (stored in VEC_OPRNDS) in demotion operation to the destination
3310 type. */
3311 if (multi_step_cvt)
3313 /* At each level of recursion we have half of the operands we had at the
3314 previous level. */
3315 vec_oprnds->truncate ((i+1)/2);
3316 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3317 stmt, vec_dsts, gsi, slp_node,
3318 VEC_PACK_TRUNC_EXPR,
3319 prev_stmt_info);
3322 vec_dsts.quick_push (vec_dest);
3326 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3327 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3328 the resulting vectors and call the function recursively. */
3330 static void
3331 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3332 vec<tree> *vec_oprnds1,
3333 gimple stmt, tree vec_dest,
3334 gimple_stmt_iterator *gsi,
3335 enum tree_code code1,
3336 enum tree_code code2, tree decl1,
3337 tree decl2, int op_type)
3339 int i;
3340 tree vop0, vop1, new_tmp1, new_tmp2;
3341 gimple new_stmt1, new_stmt2;
3342 vec<tree> vec_tmp = vNULL;
3344 vec_tmp.create (vec_oprnds0->length () * 2);
3345 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3347 if (op_type == binary_op)
3348 vop1 = (*vec_oprnds1)[i];
3349 else
3350 vop1 = NULL_TREE;
3352 /* Generate the two halves of promotion operation. */
3353 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3354 op_type, vec_dest, gsi, stmt);
3355 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3356 op_type, vec_dest, gsi, stmt);
3357 if (is_gimple_call (new_stmt1))
3359 new_tmp1 = gimple_call_lhs (new_stmt1);
3360 new_tmp2 = gimple_call_lhs (new_stmt2);
3362 else
3364 new_tmp1 = gimple_assign_lhs (new_stmt1);
3365 new_tmp2 = gimple_assign_lhs (new_stmt2);
3368 /* Store the results for the next step. */
3369 vec_tmp.quick_push (new_tmp1);
3370 vec_tmp.quick_push (new_tmp2);
3373 vec_oprnds0->release ();
3374 *vec_oprnds0 = vec_tmp;
3378 /* Check if STMT performs a conversion operation, that can be vectorized.
3379 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3380 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3381 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3383 static bool
3384 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3385 gimple *vec_stmt, slp_tree slp_node)
3387 tree vec_dest;
3388 tree scalar_dest;
3389 tree op0, op1 = NULL_TREE;
3390 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3391 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3392 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3393 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3394 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3395 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3396 tree new_temp;
3397 tree def;
3398 gimple def_stmt;
3399 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3400 gimple new_stmt = NULL;
3401 stmt_vec_info prev_stmt_info;
3402 int nunits_in;
3403 int nunits_out;
3404 tree vectype_out, vectype_in;
3405 int ncopies, i, j;
3406 tree lhs_type, rhs_type;
3407 enum { NARROW, NONE, WIDEN } modifier;
3408 vec<tree> vec_oprnds0 = vNULL;
3409 vec<tree> vec_oprnds1 = vNULL;
3410 tree vop0;
3411 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3412 int multi_step_cvt = 0;
3413 vec<tree> vec_dsts = vNULL;
3414 vec<tree> interm_types = vNULL;
3415 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3416 int op_type;
3417 enum machine_mode rhs_mode;
3418 unsigned short fltsz;
3420 /* Is STMT a vectorizable conversion? */
3422 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3423 return false;
3425 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3426 return false;
3428 if (!is_gimple_assign (stmt))
3429 return false;
3431 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3432 return false;
3434 code = gimple_assign_rhs_code (stmt);
3435 if (!CONVERT_EXPR_CODE_P (code)
3436 && code != FIX_TRUNC_EXPR
3437 && code != FLOAT_EXPR
3438 && code != WIDEN_MULT_EXPR
3439 && code != WIDEN_LSHIFT_EXPR)
3440 return false;
3442 op_type = TREE_CODE_LENGTH (code);
3444 /* Check types of lhs and rhs. */
3445 scalar_dest = gimple_assign_lhs (stmt);
3446 lhs_type = TREE_TYPE (scalar_dest);
3447 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3449 op0 = gimple_assign_rhs1 (stmt);
3450 rhs_type = TREE_TYPE (op0);
3452 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3453 && !((INTEGRAL_TYPE_P (lhs_type)
3454 && INTEGRAL_TYPE_P (rhs_type))
3455 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3456 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3457 return false;
3459 if ((INTEGRAL_TYPE_P (lhs_type)
3460 && (TYPE_PRECISION (lhs_type)
3461 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3462 || (INTEGRAL_TYPE_P (rhs_type)
3463 && (TYPE_PRECISION (rhs_type)
3464 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468 "type conversion to/from bit-precision unsupported."
3469 "\n");
3470 return false;
3473 /* Check the operands of the operation. */
3474 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3475 &def_stmt, &def, &dt[0], &vectype_in))
3477 if (dump_enabled_p ())
3478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3479 "use not simple.\n");
3480 return false;
3482 if (op_type == binary_op)
3484 bool ok;
3486 op1 = gimple_assign_rhs2 (stmt);
3487 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3488 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3489 OP1. */
3490 if (CONSTANT_CLASS_P (op0))
3491 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3492 &def_stmt, &def, &dt[1], &vectype_in);
3493 else
3494 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3495 &def, &dt[1]);
3497 if (!ok)
3499 if (dump_enabled_p ())
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3501 "use not simple.\n");
3502 return false;
3506 /* If op0 is an external or constant defs use a vector type of
3507 the same size as the output vector type. */
3508 if (!vectype_in)
3509 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3510 if (vec_stmt)
3511 gcc_assert (vectype_in);
3512 if (!vectype_in)
3514 if (dump_enabled_p ())
3516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3517 "no vectype for scalar type ");
3518 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3519 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3522 return false;
3525 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3526 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3527 if (nunits_in < nunits_out)
3528 modifier = NARROW;
3529 else if (nunits_out == nunits_in)
3530 modifier = NONE;
3531 else
3532 modifier = WIDEN;
3534 /* Multiple types in SLP are handled by creating the appropriate number of
3535 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3536 case of SLP. */
3537 if (slp_node || PURE_SLP_STMT (stmt_info))
3538 ncopies = 1;
3539 else if (modifier == NARROW)
3540 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3541 else
3542 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3544 /* Sanity check: make sure that at least one copy of the vectorized stmt
3545 needs to be generated. */
3546 gcc_assert (ncopies >= 1);
3548 /* Supportable by target? */
3549 switch (modifier)
3551 case NONE:
3552 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3553 return false;
3554 if (supportable_convert_operation (code, vectype_out, vectype_in,
3555 &decl1, &code1))
3556 break;
3557 /* FALLTHRU */
3558 unsupported:
3559 if (dump_enabled_p ())
3560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3561 "conversion not supported by target.\n");
3562 return false;
3564 case WIDEN:
3565 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3566 &code1, &code2, &multi_step_cvt,
3567 &interm_types))
3569 /* Binary widening operation can only be supported directly by the
3570 architecture. */
3571 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3572 break;
3575 if (code != FLOAT_EXPR
3576 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3577 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3578 goto unsupported;
3580 rhs_mode = TYPE_MODE (rhs_type);
3581 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3582 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3583 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3584 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3586 cvt_type
3587 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3588 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3589 if (cvt_type == NULL_TREE)
3590 goto unsupported;
3592 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3594 if (!supportable_convert_operation (code, vectype_out,
3595 cvt_type, &decl1, &codecvt1))
3596 goto unsupported;
3598 else if (!supportable_widening_operation (code, stmt, vectype_out,
3599 cvt_type, &codecvt1,
3600 &codecvt2, &multi_step_cvt,
3601 &interm_types))
3602 continue;
3603 else
3604 gcc_assert (multi_step_cvt == 0);
3606 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3607 vectype_in, &code1, &code2,
3608 &multi_step_cvt, &interm_types))
3609 break;
3612 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3613 goto unsupported;
3615 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3616 codecvt2 = ERROR_MARK;
3617 else
3619 multi_step_cvt++;
3620 interm_types.safe_push (cvt_type);
3621 cvt_type = NULL_TREE;
3623 break;
3625 case NARROW:
3626 gcc_assert (op_type == unary_op);
3627 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3628 &code1, &multi_step_cvt,
3629 &interm_types))
3630 break;
3632 if (code != FIX_TRUNC_EXPR
3633 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3634 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3635 goto unsupported;
3637 rhs_mode = TYPE_MODE (rhs_type);
3638 cvt_type
3639 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3640 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3641 if (cvt_type == NULL_TREE)
3642 goto unsupported;
3643 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3644 &decl1, &codecvt1))
3645 goto unsupported;
3646 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3647 &code1, &multi_step_cvt,
3648 &interm_types))
3649 break;
3650 goto unsupported;
3652 default:
3653 gcc_unreachable ();
3656 if (!vec_stmt) /* transformation not required. */
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_NOTE, vect_location,
3660 "=== vectorizable_conversion ===\n");
3661 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3663 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3664 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3666 else if (modifier == NARROW)
3668 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3669 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3671 else
3673 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3674 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3676 interm_types.release ();
3677 return true;
3680 /** Transform. **/
3681 if (dump_enabled_p ())
3682 dump_printf_loc (MSG_NOTE, vect_location,
3683 "transform conversion. ncopies = %d.\n", ncopies);
3685 if (op_type == binary_op)
3687 if (CONSTANT_CLASS_P (op0))
3688 op0 = fold_convert (TREE_TYPE (op1), op0);
3689 else if (CONSTANT_CLASS_P (op1))
3690 op1 = fold_convert (TREE_TYPE (op0), op1);
3693 /* In case of multi-step conversion, we first generate conversion operations
3694 to the intermediate types, and then from that types to the final one.
3695 We create vector destinations for the intermediate type (TYPES) received
3696 from supportable_*_operation, and store them in the correct order
3697 for future use in vect_create_vectorized_*_stmts (). */
3698 vec_dsts.create (multi_step_cvt + 1);
3699 vec_dest = vect_create_destination_var (scalar_dest,
3700 (cvt_type && modifier == WIDEN)
3701 ? cvt_type : vectype_out);
3702 vec_dsts.quick_push (vec_dest);
3704 if (multi_step_cvt)
3706 for (i = interm_types.length () - 1;
3707 interm_types.iterate (i, &intermediate_type); i--)
3709 vec_dest = vect_create_destination_var (scalar_dest,
3710 intermediate_type);
3711 vec_dsts.quick_push (vec_dest);
3715 if (cvt_type)
3716 vec_dest = vect_create_destination_var (scalar_dest,
3717 modifier == WIDEN
3718 ? vectype_out : cvt_type);
3720 if (!slp_node)
3722 if (modifier == WIDEN)
3724 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3725 if (op_type == binary_op)
3726 vec_oprnds1.create (1);
3728 else if (modifier == NARROW)
3729 vec_oprnds0.create (
3730 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3732 else if (code == WIDEN_LSHIFT_EXPR)
3733 vec_oprnds1.create (slp_node->vec_stmts_size);
3735 last_oprnd = op0;
3736 prev_stmt_info = NULL;
3737 switch (modifier)
3739 case NONE:
3740 for (j = 0; j < ncopies; j++)
3742 if (j == 0)
3743 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3744 -1);
3745 else
3746 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3748 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3750 /* Arguments are ready, create the new vector stmt. */
3751 if (code1 == CALL_EXPR)
3753 new_stmt = gimple_build_call (decl1, 1, vop0);
3754 new_temp = make_ssa_name (vec_dest, new_stmt);
3755 gimple_call_set_lhs (new_stmt, new_temp);
3757 else
3759 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3760 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3761 vop0, NULL);
3762 new_temp = make_ssa_name (vec_dest, new_stmt);
3763 gimple_assign_set_lhs (new_stmt, new_temp);
3766 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3767 if (slp_node)
3768 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3771 if (j == 0)
3772 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3773 else
3774 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3775 prev_stmt_info = vinfo_for_stmt (new_stmt);
3777 break;
3779 case WIDEN:
3780 /* In case the vectorization factor (VF) is bigger than the number
3781 of elements that we can fit in a vectype (nunits), we have to
3782 generate more than one vector stmt - i.e - we need to "unroll"
3783 the vector stmt by a factor VF/nunits. */
3784 for (j = 0; j < ncopies; j++)
3786 /* Handle uses. */
3787 if (j == 0)
3789 if (slp_node)
3791 if (code == WIDEN_LSHIFT_EXPR)
3793 unsigned int k;
3795 vec_oprnd1 = op1;
3796 /* Store vec_oprnd1 for every vector stmt to be created
3797 for SLP_NODE. We check during the analysis that all
3798 the shift arguments are the same. */
3799 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3800 vec_oprnds1.quick_push (vec_oprnd1);
3802 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3803 slp_node, -1);
3805 else
3806 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3807 &vec_oprnds1, slp_node, -1);
3809 else
3811 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3812 vec_oprnds0.quick_push (vec_oprnd0);
3813 if (op_type == binary_op)
3815 if (code == WIDEN_LSHIFT_EXPR)
3816 vec_oprnd1 = op1;
3817 else
3818 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3819 NULL);
3820 vec_oprnds1.quick_push (vec_oprnd1);
3824 else
3826 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3827 vec_oprnds0.truncate (0);
3828 vec_oprnds0.quick_push (vec_oprnd0);
3829 if (op_type == binary_op)
3831 if (code == WIDEN_LSHIFT_EXPR)
3832 vec_oprnd1 = op1;
3833 else
3834 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3835 vec_oprnd1);
3836 vec_oprnds1.truncate (0);
3837 vec_oprnds1.quick_push (vec_oprnd1);
3841 /* Arguments are ready. Create the new vector stmts. */
3842 for (i = multi_step_cvt; i >= 0; i--)
3844 tree this_dest = vec_dsts[i];
3845 enum tree_code c1 = code1, c2 = code2;
3846 if (i == 0 && codecvt2 != ERROR_MARK)
3848 c1 = codecvt1;
3849 c2 = codecvt2;
3851 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3852 &vec_oprnds1,
3853 stmt, this_dest, gsi,
3854 c1, c2, decl1, decl2,
3855 op_type);
3858 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3860 if (cvt_type)
3862 if (codecvt1 == CALL_EXPR)
3864 new_stmt = gimple_build_call (decl1, 1, vop0);
3865 new_temp = make_ssa_name (vec_dest, new_stmt);
3866 gimple_call_set_lhs (new_stmt, new_temp);
3868 else
3870 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3871 new_temp = make_ssa_name (vec_dest, NULL);
3872 new_stmt = gimple_build_assign_with_ops (codecvt1,
3873 new_temp,
3874 vop0, NULL);
3877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3879 else
3880 new_stmt = SSA_NAME_DEF_STMT (vop0);
3882 if (slp_node)
3883 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3884 else
3886 if (!prev_stmt_info)
3887 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3888 else
3889 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3890 prev_stmt_info = vinfo_for_stmt (new_stmt);
3895 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3896 break;
3898 case NARROW:
3899 /* In case the vectorization factor (VF) is bigger than the number
3900 of elements that we can fit in a vectype (nunits), we have to
3901 generate more than one vector stmt - i.e - we need to "unroll"
3902 the vector stmt by a factor VF/nunits. */
3903 for (j = 0; j < ncopies; j++)
3905 /* Handle uses. */
3906 if (slp_node)
3907 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3908 slp_node, -1);
3909 else
3911 vec_oprnds0.truncate (0);
3912 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3913 vect_pow2 (multi_step_cvt) - 1);
3916 /* Arguments are ready. Create the new vector stmts. */
3917 if (cvt_type)
3918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3920 if (codecvt1 == CALL_EXPR)
3922 new_stmt = gimple_build_call (decl1, 1, vop0);
3923 new_temp = make_ssa_name (vec_dest, new_stmt);
3924 gimple_call_set_lhs (new_stmt, new_temp);
3926 else
3928 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3929 new_temp = make_ssa_name (vec_dest, NULL);
3930 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3931 vop0, NULL);
3934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3935 vec_oprnds0[i] = new_temp;
3938 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3939 stmt, vec_dsts, gsi,
3940 slp_node, code1,
3941 &prev_stmt_info);
3944 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3945 break;
3948 vec_oprnds0.release ();
3949 vec_oprnds1.release ();
3950 vec_dsts.release ();
3951 interm_types.release ();
3953 return true;
3957 /* Function vectorizable_assignment.
3959 Check if STMT performs an assignment (copy) that can be vectorized.
3960 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3961 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3962 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3964 static bool
3965 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3966 gimple *vec_stmt, slp_tree slp_node)
3968 tree vec_dest;
3969 tree scalar_dest;
3970 tree op;
3971 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3972 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3973 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3974 tree new_temp;
3975 tree def;
3976 gimple def_stmt;
3977 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3978 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3979 int ncopies;
3980 int i, j;
3981 vec<tree> vec_oprnds = vNULL;
3982 tree vop;
3983 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3984 gimple new_stmt = NULL;
3985 stmt_vec_info prev_stmt_info = NULL;
3986 enum tree_code code;
3987 tree vectype_in;
3989 /* Multiple types in SLP are handled by creating the appropriate number of
3990 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3991 case of SLP. */
3992 if (slp_node || PURE_SLP_STMT (stmt_info))
3993 ncopies = 1;
3994 else
3995 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3997 gcc_assert (ncopies >= 1);
3999 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4000 return false;
4002 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4003 return false;
4005 /* Is vectorizable assignment? */
4006 if (!is_gimple_assign (stmt))
4007 return false;
4009 scalar_dest = gimple_assign_lhs (stmt);
4010 if (TREE_CODE (scalar_dest) != SSA_NAME)
4011 return false;
4013 code = gimple_assign_rhs_code (stmt);
4014 if (gimple_assign_single_p (stmt)
4015 || code == PAREN_EXPR
4016 || CONVERT_EXPR_CODE_P (code))
4017 op = gimple_assign_rhs1 (stmt);
4018 else
4019 return false;
4021 if (code == VIEW_CONVERT_EXPR)
4022 op = TREE_OPERAND (op, 0);
4024 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4025 &def_stmt, &def, &dt[0], &vectype_in))
4027 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4029 "use not simple.\n");
4030 return false;
4033 /* We can handle NOP_EXPR conversions that do not change the number
4034 of elements or the vector size. */
4035 if ((CONVERT_EXPR_CODE_P (code)
4036 || code == VIEW_CONVERT_EXPR)
4037 && (!vectype_in
4038 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4039 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4040 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4041 return false;
4043 /* We do not handle bit-precision changes. */
4044 if ((CONVERT_EXPR_CODE_P (code)
4045 || code == VIEW_CONVERT_EXPR)
4046 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4047 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4048 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4049 || ((TYPE_PRECISION (TREE_TYPE (op))
4050 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4051 /* But a conversion that does not change the bit-pattern is ok. */
4052 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4053 > TYPE_PRECISION (TREE_TYPE (op)))
4054 && TYPE_UNSIGNED (TREE_TYPE (op))))
4056 if (dump_enabled_p ())
4057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4058 "type conversion to/from bit-precision "
4059 "unsupported.\n");
4060 return false;
4063 if (!vec_stmt) /* transformation not required. */
4065 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4066 if (dump_enabled_p ())
4067 dump_printf_loc (MSG_NOTE, vect_location,
4068 "=== vectorizable_assignment ===\n");
4069 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4070 return true;
4073 /** Transform. **/
4074 if (dump_enabled_p ())
4075 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4077 /* Handle def. */
4078 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4080 /* Handle use. */
4081 for (j = 0; j < ncopies; j++)
4083 /* Handle uses. */
4084 if (j == 0)
4085 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4086 else
4087 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4089 /* Arguments are ready. create the new vector stmt. */
4090 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4092 if (CONVERT_EXPR_CODE_P (code)
4093 || code == VIEW_CONVERT_EXPR)
4094 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4095 new_stmt = gimple_build_assign (vec_dest, vop);
4096 new_temp = make_ssa_name (vec_dest, new_stmt);
4097 gimple_assign_set_lhs (new_stmt, new_temp);
4098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4099 if (slp_node)
4100 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4103 if (slp_node)
4104 continue;
4106 if (j == 0)
4107 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4108 else
4109 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4111 prev_stmt_info = vinfo_for_stmt (new_stmt);
4114 vec_oprnds.release ();
4115 return true;
4119 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4120 either as shift by a scalar or by a vector. */
4122 bool
4123 vect_supportable_shift (enum tree_code code, tree scalar_type)
4126 enum machine_mode vec_mode;
4127 optab optab;
4128 int icode;
4129 tree vectype;
4131 vectype = get_vectype_for_scalar_type (scalar_type);
4132 if (!vectype)
4133 return false;
4135 optab = optab_for_tree_code (code, vectype, optab_scalar);
4136 if (!optab
4137 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4139 optab = optab_for_tree_code (code, vectype, optab_vector);
4140 if (!optab
4141 || (optab_handler (optab, TYPE_MODE (vectype))
4142 == CODE_FOR_nothing))
4143 return false;
4146 vec_mode = TYPE_MODE (vectype);
4147 icode = (int) optab_handler (optab, vec_mode);
4148 if (icode == CODE_FOR_nothing)
4149 return false;
4151 return true;
4155 /* Function vectorizable_shift.
4157 Check if STMT performs a shift operation that can be vectorized.
4158 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4159 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4160 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4162 static bool
4163 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4164 gimple *vec_stmt, slp_tree slp_node)
4166 tree vec_dest;
4167 tree scalar_dest;
4168 tree op0, op1 = NULL;
4169 tree vec_oprnd1 = NULL_TREE;
4170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4171 tree vectype;
4172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4173 enum tree_code code;
4174 enum machine_mode vec_mode;
4175 tree new_temp;
4176 optab optab;
4177 int icode;
4178 enum machine_mode optab_op2_mode;
4179 tree def;
4180 gimple def_stmt;
4181 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4182 gimple new_stmt = NULL;
4183 stmt_vec_info prev_stmt_info;
4184 int nunits_in;
4185 int nunits_out;
4186 tree vectype_out;
4187 tree op1_vectype;
4188 int ncopies;
4189 int j, i;
4190 vec<tree> vec_oprnds0 = vNULL;
4191 vec<tree> vec_oprnds1 = vNULL;
4192 tree vop0, vop1;
4193 unsigned int k;
4194 bool scalar_shift_arg = true;
4195 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4196 int vf;
4198 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4199 return false;
4201 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4202 return false;
4204 /* Is STMT a vectorizable binary/unary operation? */
4205 if (!is_gimple_assign (stmt))
4206 return false;
4208 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4209 return false;
4211 code = gimple_assign_rhs_code (stmt);
4213 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4214 || code == RROTATE_EXPR))
4215 return false;
4217 scalar_dest = gimple_assign_lhs (stmt);
4218 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4219 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4220 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4222 if (dump_enabled_p ())
4223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4224 "bit-precision shifts not supported.\n");
4225 return false;
4228 op0 = gimple_assign_rhs1 (stmt);
4229 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4230 &def_stmt, &def, &dt[0], &vectype))
4232 if (dump_enabled_p ())
4233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4234 "use not simple.\n");
4235 return false;
4237 /* If op0 is an external or constant def use a vector type with
4238 the same size as the output vector type. */
4239 if (!vectype)
4240 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4241 if (vec_stmt)
4242 gcc_assert (vectype);
4243 if (!vectype)
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4247 "no vectype for scalar type\n");
4248 return false;
4251 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4252 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4253 if (nunits_out != nunits_in)
4254 return false;
4256 op1 = gimple_assign_rhs2 (stmt);
4257 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4258 &def, &dt[1], &op1_vectype))
4260 if (dump_enabled_p ())
4261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4262 "use not simple.\n");
4263 return false;
4266 if (loop_vinfo)
4267 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4268 else
4269 vf = 1;
4271 /* Multiple types in SLP are handled by creating the appropriate number of
4272 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4273 case of SLP. */
4274 if (slp_node || PURE_SLP_STMT (stmt_info))
4275 ncopies = 1;
4276 else
4277 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4279 gcc_assert (ncopies >= 1);
4281 /* Determine whether the shift amount is a vector, or scalar. If the
4282 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4284 if (dt[1] == vect_internal_def && !slp_node)
4285 scalar_shift_arg = false;
4286 else if (dt[1] == vect_constant_def
4287 || dt[1] == vect_external_def
4288 || dt[1] == vect_internal_def)
4290 /* In SLP, need to check whether the shift count is the same,
4291 in loops if it is a constant or invariant, it is always
4292 a scalar shift. */
4293 if (slp_node)
4295 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4296 gimple slpstmt;
4298 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4299 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4300 scalar_shift_arg = false;
4303 else
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4307 "operand mode requires invariant argument.\n");
4308 return false;
4311 /* Vector shifted by vector. */
4312 if (!scalar_shift_arg)
4314 optab = optab_for_tree_code (code, vectype, optab_vector);
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_NOTE, vect_location,
4317 "vector/vector shift/rotate found.\n");
4319 if (!op1_vectype)
4320 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4321 if (op1_vectype == NULL_TREE
4322 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4324 if (dump_enabled_p ())
4325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4326 "unusable type for last operand in"
4327 " vector/vector shift/rotate.\n");
4328 return false;
4331 /* See if the machine has a vector shifted by scalar insn and if not
4332 then see if it has a vector shifted by vector insn. */
4333 else
4335 optab = optab_for_tree_code (code, vectype, optab_scalar);
4336 if (optab
4337 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4339 if (dump_enabled_p ())
4340 dump_printf_loc (MSG_NOTE, vect_location,
4341 "vector/scalar shift/rotate found.\n");
4343 else
4345 optab = optab_for_tree_code (code, vectype, optab_vector);
4346 if (optab
4347 && (optab_handler (optab, TYPE_MODE (vectype))
4348 != CODE_FOR_nothing))
4350 scalar_shift_arg = false;
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE, vect_location,
4354 "vector/vector shift/rotate found.\n");
4356 /* Unlike the other binary operators, shifts/rotates have
4357 the rhs being int, instead of the same type as the lhs,
4358 so make sure the scalar is the right type if we are
4359 dealing with vectors of long long/long/short/char. */
4360 if (dt[1] == vect_constant_def)
4361 op1 = fold_convert (TREE_TYPE (vectype), op1);
4362 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4363 TREE_TYPE (op1)))
4365 if (slp_node
4366 && TYPE_MODE (TREE_TYPE (vectype))
4367 != TYPE_MODE (TREE_TYPE (op1)))
4369 if (dump_enabled_p ())
4370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4371 "unusable type for last operand in"
4372 " vector/vector shift/rotate.\n");
4373 return false;
4375 if (vec_stmt && !slp_node)
4377 op1 = fold_convert (TREE_TYPE (vectype), op1);
4378 op1 = vect_init_vector (stmt, op1,
4379 TREE_TYPE (vectype), NULL);
4386 /* Supportable by target? */
4387 if (!optab)
4389 if (dump_enabled_p ())
4390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4391 "no optab.\n");
4392 return false;
4394 vec_mode = TYPE_MODE (vectype);
4395 icode = (int) optab_handler (optab, vec_mode);
4396 if (icode == CODE_FOR_nothing)
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4400 "op not supported by target.\n");
4401 /* Check only during analysis. */
4402 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4403 || (vf < vect_min_worthwhile_factor (code)
4404 && !vec_stmt))
4405 return false;
4406 if (dump_enabled_p ())
4407 dump_printf_loc (MSG_NOTE, vect_location,
4408 "proceeding using word mode.\n");
4411 /* Worthwhile without SIMD support? Check only during analysis. */
4412 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4413 && vf < vect_min_worthwhile_factor (code)
4414 && !vec_stmt)
4416 if (dump_enabled_p ())
4417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4418 "not worthwhile without SIMD support.\n");
4419 return false;
4422 if (!vec_stmt) /* transformation not required. */
4424 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4425 if (dump_enabled_p ())
4426 dump_printf_loc (MSG_NOTE, vect_location,
4427 "=== vectorizable_shift ===\n");
4428 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4429 return true;
4432 /** Transform. **/
4434 if (dump_enabled_p ())
4435 dump_printf_loc (MSG_NOTE, vect_location,
4436 "transform binary/unary operation.\n");
4438 /* Handle def. */
4439 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4441 prev_stmt_info = NULL;
4442 for (j = 0; j < ncopies; j++)
4444 /* Handle uses. */
4445 if (j == 0)
4447 if (scalar_shift_arg)
4449 /* Vector shl and shr insn patterns can be defined with scalar
4450 operand 2 (shift operand). In this case, use constant or loop
4451 invariant op1 directly, without extending it to vector mode
4452 first. */
4453 optab_op2_mode = insn_data[icode].operand[2].mode;
4454 if (!VECTOR_MODE_P (optab_op2_mode))
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_NOTE, vect_location,
4458 "operand 1 using scalar mode.\n");
4459 vec_oprnd1 = op1;
4460 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4461 vec_oprnds1.quick_push (vec_oprnd1);
4462 if (slp_node)
4464 /* Store vec_oprnd1 for every vector stmt to be created
4465 for SLP_NODE. We check during the analysis that all
4466 the shift arguments are the same.
4467 TODO: Allow different constants for different vector
4468 stmts generated for an SLP instance. */
4469 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4470 vec_oprnds1.quick_push (vec_oprnd1);
4475 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4476 (a special case for certain kind of vector shifts); otherwise,
4477 operand 1 should be of a vector type (the usual case). */
4478 if (vec_oprnd1)
4479 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4480 slp_node, -1);
4481 else
4482 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4483 slp_node, -1);
4485 else
4486 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4488 /* Arguments are ready. Create the new vector stmt. */
4489 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4491 vop1 = vec_oprnds1[i];
4492 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4493 new_temp = make_ssa_name (vec_dest, new_stmt);
4494 gimple_assign_set_lhs (new_stmt, new_temp);
4495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4496 if (slp_node)
4497 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4500 if (slp_node)
4501 continue;
4503 if (j == 0)
4504 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4505 else
4506 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4507 prev_stmt_info = vinfo_for_stmt (new_stmt);
4510 vec_oprnds0.release ();
4511 vec_oprnds1.release ();
4513 return true;
4517 /* Function vectorizable_operation.
4519 Check if STMT performs a binary, unary or ternary operation that can
4520 be vectorized.
4521 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4522 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4523 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4525 static bool
4526 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4527 gimple *vec_stmt, slp_tree slp_node)
4529 tree vec_dest;
4530 tree scalar_dest;
4531 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4533 tree vectype;
4534 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4535 enum tree_code code;
4536 enum machine_mode vec_mode;
4537 tree new_temp;
4538 int op_type;
4539 optab optab;
4540 int icode;
4541 tree def;
4542 gimple def_stmt;
4543 enum vect_def_type dt[3]
4544 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4545 gimple new_stmt = NULL;
4546 stmt_vec_info prev_stmt_info;
4547 int nunits_in;
4548 int nunits_out;
4549 tree vectype_out;
4550 int ncopies;
4551 int j, i;
4552 vec<tree> vec_oprnds0 = vNULL;
4553 vec<tree> vec_oprnds1 = vNULL;
4554 vec<tree> vec_oprnds2 = vNULL;
4555 tree vop0, vop1, vop2;
4556 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4557 int vf;
4559 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4560 return false;
4562 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4563 return false;
4565 /* Is STMT a vectorizable binary/unary operation? */
4566 if (!is_gimple_assign (stmt))
4567 return false;
4569 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4570 return false;
4572 code = gimple_assign_rhs_code (stmt);
4574 /* For pointer addition, we should use the normal plus for
4575 the vector addition. */
4576 if (code == POINTER_PLUS_EXPR)
4577 code = PLUS_EXPR;
4579 /* Support only unary or binary operations. */
4580 op_type = TREE_CODE_LENGTH (code);
4581 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4583 if (dump_enabled_p ())
4584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4585 "num. args = %d (not unary/binary/ternary op).\n",
4586 op_type);
4587 return false;
4590 scalar_dest = gimple_assign_lhs (stmt);
4591 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4593 /* Most operations cannot handle bit-precision types without extra
4594 truncations. */
4595 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4596 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4597 /* Exception are bitwise binary operations. */
4598 && code != BIT_IOR_EXPR
4599 && code != BIT_XOR_EXPR
4600 && code != BIT_AND_EXPR)
4602 if (dump_enabled_p ())
4603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4604 "bit-precision arithmetic not supported.\n");
4605 return false;
4608 op0 = gimple_assign_rhs1 (stmt);
4609 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4610 &def_stmt, &def, &dt[0], &vectype))
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4614 "use not simple.\n");
4615 return false;
4617 /* If op0 is an external or constant def use a vector type with
4618 the same size as the output vector type. */
4619 if (!vectype)
4620 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4621 if (vec_stmt)
4622 gcc_assert (vectype);
4623 if (!vectype)
4625 if (dump_enabled_p ())
4627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4628 "no vectype for scalar type ");
4629 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4630 TREE_TYPE (op0));
4631 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4634 return false;
4637 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4638 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4639 if (nunits_out != nunits_in)
4640 return false;
4642 if (op_type == binary_op || op_type == ternary_op)
4644 op1 = gimple_assign_rhs2 (stmt);
4645 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4646 &def, &dt[1]))
4648 if (dump_enabled_p ())
4649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4650 "use not simple.\n");
4651 return false;
4654 if (op_type == ternary_op)
4656 op2 = gimple_assign_rhs3 (stmt);
4657 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4658 &def, &dt[2]))
4660 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4662 "use not simple.\n");
4663 return false;
4667 if (loop_vinfo)
4668 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4669 else
4670 vf = 1;
4672 /* Multiple types in SLP are handled by creating the appropriate number of
4673 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4674 case of SLP. */
4675 if (slp_node || PURE_SLP_STMT (stmt_info))
4676 ncopies = 1;
4677 else
4678 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4680 gcc_assert (ncopies >= 1);
4682 /* Shifts are handled in vectorizable_shift (). */
4683 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4684 || code == RROTATE_EXPR)
4685 return false;
4687 /* Supportable by target? */
4689 vec_mode = TYPE_MODE (vectype);
4690 if (code == MULT_HIGHPART_EXPR)
4692 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4693 icode = LAST_INSN_CODE;
4694 else
4695 icode = CODE_FOR_nothing;
4697 else
4699 optab = optab_for_tree_code (code, vectype, optab_default);
4700 if (!optab)
4702 if (dump_enabled_p ())
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4704 "no optab.\n");
4705 return false;
4707 icode = (int) optab_handler (optab, vec_mode);
4710 if (icode == CODE_FOR_nothing)
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4714 "op not supported by target.\n");
4715 /* Check only during analysis. */
4716 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4717 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4718 return false;
4719 if (dump_enabled_p ())
4720 dump_printf_loc (MSG_NOTE, vect_location,
4721 "proceeding using word mode.\n");
4724 /* Worthwhile without SIMD support? Check only during analysis. */
4725 if (!VECTOR_MODE_P (vec_mode)
4726 && !vec_stmt
4727 && vf < vect_min_worthwhile_factor (code))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "not worthwhile without SIMD support.\n");
4732 return false;
4735 if (!vec_stmt) /* transformation not required. */
4737 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4738 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_NOTE, vect_location,
4740 "=== vectorizable_operation ===\n");
4741 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4742 return true;
4745 /** Transform. **/
4747 if (dump_enabled_p ())
4748 dump_printf_loc (MSG_NOTE, vect_location,
4749 "transform binary/unary operation.\n");
4751 /* Handle def. */
4752 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4754 /* In case the vectorization factor (VF) is bigger than the number
4755 of elements that we can fit in a vectype (nunits), we have to generate
4756 more than one vector stmt - i.e - we need to "unroll" the
4757 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4758 from one copy of the vector stmt to the next, in the field
4759 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4760 stages to find the correct vector defs to be used when vectorizing
4761 stmts that use the defs of the current stmt. The example below
4762 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4763 we need to create 4 vectorized stmts):
4765 before vectorization:
4766 RELATED_STMT VEC_STMT
4767 S1: x = memref - -
4768 S2: z = x + 1 - -
4770 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4771 there):
4772 RELATED_STMT VEC_STMT
4773 VS1_0: vx0 = memref0 VS1_1 -
4774 VS1_1: vx1 = memref1 VS1_2 -
4775 VS1_2: vx2 = memref2 VS1_3 -
4776 VS1_3: vx3 = memref3 - -
4777 S1: x = load - VS1_0
4778 S2: z = x + 1 - -
4780 step2: vectorize stmt S2 (done here):
4781 To vectorize stmt S2 we first need to find the relevant vector
4782 def for the first operand 'x'. This is, as usual, obtained from
4783 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4784 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4785 relevant vector def 'vx0'. Having found 'vx0' we can generate
4786 the vector stmt VS2_0, and as usual, record it in the
4787 STMT_VINFO_VEC_STMT of stmt S2.
4788 When creating the second copy (VS2_1), we obtain the relevant vector
4789 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4790 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4791 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4792 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4793 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4794 chain of stmts and pointers:
4795 RELATED_STMT VEC_STMT
4796 VS1_0: vx0 = memref0 VS1_1 -
4797 VS1_1: vx1 = memref1 VS1_2 -
4798 VS1_2: vx2 = memref2 VS1_3 -
4799 VS1_3: vx3 = memref3 - -
4800 S1: x = load - VS1_0
4801 VS2_0: vz0 = vx0 + v1 VS2_1 -
4802 VS2_1: vz1 = vx1 + v1 VS2_2 -
4803 VS2_2: vz2 = vx2 + v1 VS2_3 -
4804 VS2_3: vz3 = vx3 + v1 - -
4805 S2: z = x + 1 - VS2_0 */
4807 prev_stmt_info = NULL;
4808 for (j = 0; j < ncopies; j++)
4810 /* Handle uses. */
4811 if (j == 0)
4813 if (op_type == binary_op || op_type == ternary_op)
4814 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4815 slp_node, -1);
4816 else
4817 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4818 slp_node, -1);
4819 if (op_type == ternary_op)
4821 vec_oprnds2.create (1);
4822 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4823 stmt,
4824 NULL));
4827 else
4829 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4830 if (op_type == ternary_op)
4832 tree vec_oprnd = vec_oprnds2.pop ();
4833 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4834 vec_oprnd));
4838 /* Arguments are ready. Create the new vector stmt. */
4839 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4841 vop1 = ((op_type == binary_op || op_type == ternary_op)
4842 ? vec_oprnds1[i] : NULL_TREE);
4843 vop2 = ((op_type == ternary_op)
4844 ? vec_oprnds2[i] : NULL_TREE);
4845 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4846 vop0, vop1, vop2);
4847 new_temp = make_ssa_name (vec_dest, new_stmt);
4848 gimple_assign_set_lhs (new_stmt, new_temp);
4849 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4850 if (slp_node)
4851 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4854 if (slp_node)
4855 continue;
4857 if (j == 0)
4858 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4859 else
4860 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4861 prev_stmt_info = vinfo_for_stmt (new_stmt);
4864 vec_oprnds0.release ();
4865 vec_oprnds1.release ();
4866 vec_oprnds2.release ();
4868 return true;
4871 /* A helper function to ensure data reference DR's base alignment
4872 for STMT_INFO. */
4874 static void
4875 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4877 if (!dr->aux)
4878 return;
4880 if (((dataref_aux *)dr->aux)->base_misaligned)
4882 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4883 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4885 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4886 DECL_USER_ALIGN (base_decl) = 1;
4887 ((dataref_aux *)dr->aux)->base_misaligned = false;
4892 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4893 reversal of the vector elements. If that is impossible to do,
4894 returns NULL. */
4896 static tree
4897 perm_mask_for_reverse (tree vectype)
4899 int i, nunits;
4900 unsigned char *sel;
4902 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4903 sel = XALLOCAVEC (unsigned char, nunits);
4905 for (i = 0; i < nunits; ++i)
4906 sel[i] = nunits - 1 - i;
4908 return vect_gen_perm_mask (vectype, sel);
4911 /* Function vectorizable_store.
4913 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4914 can be vectorized.
4915 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4916 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4917 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4919 static bool
4920 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4921 slp_tree slp_node)
4923 tree scalar_dest;
4924 tree data_ref;
4925 tree op;
4926 tree vec_oprnd = NULL_TREE;
4927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4928 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4930 tree elem_type;
4931 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4932 struct loop *loop = NULL;
4933 enum machine_mode vec_mode;
4934 tree dummy;
4935 enum dr_alignment_support alignment_support_scheme;
4936 tree def;
4937 gimple def_stmt;
4938 enum vect_def_type dt;
4939 stmt_vec_info prev_stmt_info = NULL;
4940 tree dataref_ptr = NULL_TREE;
4941 tree dataref_offset = NULL_TREE;
4942 gimple ptr_incr = NULL;
4943 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4944 int ncopies;
4945 int j;
4946 gimple next_stmt, first_stmt = NULL;
4947 bool grouped_store = false;
4948 bool store_lanes_p = false;
4949 unsigned int group_size, i;
4950 vec<tree> dr_chain = vNULL;
4951 vec<tree> oprnds = vNULL;
4952 vec<tree> result_chain = vNULL;
4953 bool inv_p;
4954 bool negative = false;
4955 tree offset = NULL_TREE;
4956 vec<tree> vec_oprnds = vNULL;
4957 bool slp = (slp_node != NULL);
4958 unsigned int vec_num;
4959 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4960 tree aggr_type;
4962 if (loop_vinfo)
4963 loop = LOOP_VINFO_LOOP (loop_vinfo);
4965 /* Multiple types in SLP are handled by creating the appropriate number of
4966 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4967 case of SLP. */
4968 if (slp || PURE_SLP_STMT (stmt_info))
4969 ncopies = 1;
4970 else
4971 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4973 gcc_assert (ncopies >= 1);
4975 /* FORNOW. This restriction should be relaxed. */
4976 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4980 "multiple types in nested loop.\n");
4981 return false;
4984 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4985 return false;
4987 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4988 return false;
4990 /* Is vectorizable store? */
4992 if (!is_gimple_assign (stmt))
4993 return false;
4995 scalar_dest = gimple_assign_lhs (stmt);
4996 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
4997 && is_pattern_stmt_p (stmt_info))
4998 scalar_dest = TREE_OPERAND (scalar_dest, 0);
4999 if (TREE_CODE (scalar_dest) != ARRAY_REF
5000 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5001 && TREE_CODE (scalar_dest) != INDIRECT_REF
5002 && TREE_CODE (scalar_dest) != COMPONENT_REF
5003 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5004 && TREE_CODE (scalar_dest) != REALPART_EXPR
5005 && TREE_CODE (scalar_dest) != MEM_REF)
5006 return false;
5008 gcc_assert (gimple_assign_single_p (stmt));
5009 op = gimple_assign_rhs1 (stmt);
5010 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5011 &def, &dt))
5013 if (dump_enabled_p ())
5014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5015 "use not simple.\n");
5016 return false;
5019 elem_type = TREE_TYPE (vectype);
5020 vec_mode = TYPE_MODE (vectype);
5022 /* FORNOW. In some cases can vectorize even if data-type not supported
5023 (e.g. - array initialization with 0). */
5024 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5025 return false;
5027 if (!STMT_VINFO_DATA_REF (stmt_info))
5028 return false;
5030 negative =
5031 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5032 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5033 size_zero_node) < 0;
5034 if (negative && ncopies > 1)
5036 if (dump_enabled_p ())
5037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5038 "multiple types with negative step.\n");
5039 return false;
5042 if (negative)
5044 gcc_assert (!grouped_store);
5045 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5046 if (alignment_support_scheme != dr_aligned
5047 && alignment_support_scheme != dr_unaligned_supported)
5049 if (dump_enabled_p ())
5050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5051 "negative step but alignment required.\n");
5052 return false;
5054 if (dt != vect_constant_def
5055 && dt != vect_external_def
5056 && !perm_mask_for_reverse (vectype))
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5060 "negative step and reversing not supported.\n");
5061 return false;
5065 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5067 grouped_store = true;
5068 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5069 if (!slp && !PURE_SLP_STMT (stmt_info))
5071 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5072 if (vect_store_lanes_supported (vectype, group_size))
5073 store_lanes_p = true;
5074 else if (!vect_grouped_store_supported (vectype, group_size))
5075 return false;
5078 if (first_stmt == stmt)
5080 /* STMT is the leader of the group. Check the operands of all the
5081 stmts of the group. */
5082 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5083 while (next_stmt)
5085 gcc_assert (gimple_assign_single_p (next_stmt));
5086 op = gimple_assign_rhs1 (next_stmt);
5087 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5088 &def_stmt, &def, &dt))
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "use not simple.\n");
5093 return false;
5095 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5100 if (!vec_stmt) /* transformation not required. */
5102 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5103 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5104 NULL, NULL, NULL);
5105 return true;
5108 /** Transform. **/
5110 ensure_base_align (stmt_info, dr);
5112 if (grouped_store)
5114 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5115 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5117 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5119 /* FORNOW */
5120 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5122 /* We vectorize all the stmts of the interleaving group when we
5123 reach the last stmt in the group. */
5124 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5125 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5126 && !slp)
5128 *vec_stmt = NULL;
5129 return true;
5132 if (slp)
5134 grouped_store = false;
5135 /* VEC_NUM is the number of vect stmts to be created for this
5136 group. */
5137 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5138 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5139 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5140 op = gimple_assign_rhs1 (first_stmt);
5142 else
5143 /* VEC_NUM is the number of vect stmts to be created for this
5144 group. */
5145 vec_num = group_size;
5147 else
5149 first_stmt = stmt;
5150 first_dr = dr;
5151 group_size = vec_num = 1;
5154 if (dump_enabled_p ())
5155 dump_printf_loc (MSG_NOTE, vect_location,
5156 "transform store. ncopies = %d\n", ncopies);
5158 dr_chain.create (group_size);
5159 oprnds.create (group_size);
5161 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5162 gcc_assert (alignment_support_scheme);
5163 /* Targets with store-lane instructions must not require explicit
5164 realignment. */
5165 gcc_assert (!store_lanes_p
5166 || alignment_support_scheme == dr_aligned
5167 || alignment_support_scheme == dr_unaligned_supported);
5169 if (negative)
5170 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5172 if (store_lanes_p)
5173 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5174 else
5175 aggr_type = vectype;
5177 /* In case the vectorization factor (VF) is bigger than the number
5178 of elements that we can fit in a vectype (nunits), we have to generate
5179 more than one vector stmt - i.e - we need to "unroll" the
5180 vector stmt by a factor VF/nunits. For more details see documentation in
5181 vect_get_vec_def_for_copy_stmt. */
5183 /* In case of interleaving (non-unit grouped access):
5185 S1: &base + 2 = x2
5186 S2: &base = x0
5187 S3: &base + 1 = x1
5188 S4: &base + 3 = x3
5190 We create vectorized stores starting from base address (the access of the
5191 first stmt in the chain (S2 in the above example), when the last store stmt
5192 of the chain (S4) is reached:
5194 VS1: &base = vx2
5195 VS2: &base + vec_size*1 = vx0
5196 VS3: &base + vec_size*2 = vx1
5197 VS4: &base + vec_size*3 = vx3
5199 Then permutation statements are generated:
5201 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5202 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5205 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5206 (the order of the data-refs in the output of vect_permute_store_chain
5207 corresponds to the order of scalar stmts in the interleaving chain - see
5208 the documentation of vect_permute_store_chain()).
5210 In case of both multiple types and interleaving, above vector stores and
5211 permutation stmts are created for every copy. The result vector stmts are
5212 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5213 STMT_VINFO_RELATED_STMT for the next copies.
5216 prev_stmt_info = NULL;
5217 for (j = 0; j < ncopies; j++)
5219 gimple new_stmt;
5221 if (j == 0)
5223 if (slp)
5225 /* Get vectorized arguments for SLP_NODE. */
5226 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5227 NULL, slp_node, -1);
5229 vec_oprnd = vec_oprnds[0];
5231 else
5233 /* For interleaved stores we collect vectorized defs for all the
5234 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5235 used as an input to vect_permute_store_chain(), and OPRNDS as
5236 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5238 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5239 OPRNDS are of size 1. */
5240 next_stmt = first_stmt;
5241 for (i = 0; i < group_size; i++)
5243 /* Since gaps are not supported for interleaved stores,
5244 GROUP_SIZE is the exact number of stmts in the chain.
5245 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5246 there is no interleaving, GROUP_SIZE is 1, and only one
5247 iteration of the loop will be executed. */
5248 gcc_assert (next_stmt
5249 && gimple_assign_single_p (next_stmt));
5250 op = gimple_assign_rhs1 (next_stmt);
5252 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5253 NULL);
5254 dr_chain.quick_push (vec_oprnd);
5255 oprnds.quick_push (vec_oprnd);
5256 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5260 /* We should have catched mismatched types earlier. */
5261 gcc_assert (useless_type_conversion_p (vectype,
5262 TREE_TYPE (vec_oprnd)));
5263 bool simd_lane_access_p
5264 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5265 if (simd_lane_access_p
5266 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5267 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5268 && integer_zerop (DR_OFFSET (first_dr))
5269 && integer_zerop (DR_INIT (first_dr))
5270 && alias_sets_conflict_p (get_alias_set (aggr_type),
5271 get_alias_set (DR_REF (first_dr))))
5273 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5274 dataref_offset = build_int_cst (reference_alias_ptr_type
5275 (DR_REF (first_dr)), 0);
5276 inv_p = false;
5278 else
5279 dataref_ptr
5280 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5281 simd_lane_access_p ? loop : NULL,
5282 offset, &dummy, gsi, &ptr_incr,
5283 simd_lane_access_p, &inv_p);
5284 gcc_assert (bb_vinfo || !inv_p);
5286 else
5288 /* For interleaved stores we created vectorized defs for all the
5289 defs stored in OPRNDS in the previous iteration (previous copy).
5290 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5291 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5292 next copy.
5293 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5294 OPRNDS are of size 1. */
5295 for (i = 0; i < group_size; i++)
5297 op = oprnds[i];
5298 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5299 &def, &dt);
5300 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5301 dr_chain[i] = vec_oprnd;
5302 oprnds[i] = vec_oprnd;
5304 if (dataref_offset)
5305 dataref_offset
5306 = int_const_binop (PLUS_EXPR, dataref_offset,
5307 TYPE_SIZE_UNIT (aggr_type));
5308 else
5309 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5310 TYPE_SIZE_UNIT (aggr_type));
5313 if (store_lanes_p)
5315 tree vec_array;
5317 /* Combine all the vectors into an array. */
5318 vec_array = create_vector_array (vectype, vec_num);
5319 for (i = 0; i < vec_num; i++)
5321 vec_oprnd = dr_chain[i];
5322 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5325 /* Emit:
5326 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5327 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5328 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5329 gimple_call_set_lhs (new_stmt, data_ref);
5330 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5332 else
5334 new_stmt = NULL;
5335 if (grouped_store)
5337 if (j == 0)
5338 result_chain.create (group_size);
5339 /* Permute. */
5340 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5341 &result_chain);
5344 next_stmt = first_stmt;
5345 for (i = 0; i < vec_num; i++)
5347 unsigned align, misalign;
5349 if (i > 0)
5350 /* Bump the vector pointer. */
5351 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5352 stmt, NULL_TREE);
5354 if (slp)
5355 vec_oprnd = vec_oprnds[i];
5356 else if (grouped_store)
5357 /* For grouped stores vectorized defs are interleaved in
5358 vect_permute_store_chain(). */
5359 vec_oprnd = result_chain[i];
5361 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5362 dataref_offset
5363 ? dataref_offset
5364 : build_int_cst (reference_alias_ptr_type
5365 (DR_REF (first_dr)), 0));
5366 align = TYPE_ALIGN_UNIT (vectype);
5367 if (aligned_access_p (first_dr))
5368 misalign = 0;
5369 else if (DR_MISALIGNMENT (first_dr) == -1)
5371 TREE_TYPE (data_ref)
5372 = build_aligned_type (TREE_TYPE (data_ref),
5373 TYPE_ALIGN (elem_type));
5374 align = TYPE_ALIGN_UNIT (elem_type);
5375 misalign = 0;
5377 else
5379 TREE_TYPE (data_ref)
5380 = build_aligned_type (TREE_TYPE (data_ref),
5381 TYPE_ALIGN (elem_type));
5382 misalign = DR_MISALIGNMENT (first_dr);
5384 if (dataref_offset == NULL_TREE)
5385 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5386 misalign);
5388 if (negative
5389 && dt != vect_constant_def
5390 && dt != vect_external_def)
5392 tree perm_mask = perm_mask_for_reverse (vectype);
5393 tree perm_dest
5394 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5395 vectype);
5396 tree new_temp = make_ssa_name (perm_dest, NULL);
5398 /* Generate the permute statement. */
5399 gimple perm_stmt
5400 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5401 vec_oprnd, vec_oprnd,
5402 perm_mask);
5403 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5405 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5406 vec_oprnd = new_temp;
5409 /* Arguments are ready. Create the new vector stmt. */
5410 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5413 if (slp)
5414 continue;
5416 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5417 if (!next_stmt)
5418 break;
5421 if (!slp)
5423 if (j == 0)
5424 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5425 else
5426 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5427 prev_stmt_info = vinfo_for_stmt (new_stmt);
5431 dr_chain.release ();
5432 oprnds.release ();
5433 result_chain.release ();
5434 vec_oprnds.release ();
5436 return true;
5439 /* Given a vector type VECTYPE and permutation SEL returns
5440 the VECTOR_CST mask that implements the permutation of the
5441 vector elements. If that is impossible to do, returns NULL. */
5443 tree
5444 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5446 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5447 int i, nunits;
5449 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5451 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5452 return NULL;
5454 mask_elt_type = lang_hooks.types.type_for_mode
5455 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5456 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5458 mask_elts = XALLOCAVEC (tree, nunits);
5459 for (i = nunits - 1; i >= 0; i--)
5460 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5461 mask_vec = build_vector (mask_type, mask_elts);
5463 return mask_vec;
5466 /* Given a vector variable X and Y, that was generated for the scalar
5467 STMT, generate instructions to permute the vector elements of X and Y
5468 using permutation mask MASK_VEC, insert them at *GSI and return the
5469 permuted vector variable. */
5471 static tree
5472 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5473 gimple_stmt_iterator *gsi)
5475 tree vectype = TREE_TYPE (x);
5476 tree perm_dest, data_ref;
5477 gimple perm_stmt;
5479 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5480 data_ref = make_ssa_name (perm_dest, NULL);
5482 /* Generate the permute statement. */
5483 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5484 x, y, mask_vec);
5485 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5487 return data_ref;
5490 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5491 inserting them on the loops preheader edge. Returns true if we
5492 were successful in doing so (and thus STMT can be moved then),
5493 otherwise returns false. */
5495 static bool
5496 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5498 ssa_op_iter i;
5499 tree op;
5500 bool any = false;
5502 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5504 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5505 if (!gimple_nop_p (def_stmt)
5506 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5508 /* Make sure we don't need to recurse. While we could do
5509 so in simple cases when there are more complex use webs
5510 we don't have an easy way to preserve stmt order to fulfil
5511 dependencies within them. */
5512 tree op2;
5513 ssa_op_iter i2;
5514 if (gimple_code (def_stmt) == GIMPLE_PHI)
5515 return false;
5516 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5518 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5519 if (!gimple_nop_p (def_stmt2)
5520 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5521 return false;
5523 any = true;
5527 if (!any)
5528 return true;
5530 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5532 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5533 if (!gimple_nop_p (def_stmt)
5534 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5536 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5537 gsi_remove (&gsi, false);
5538 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5542 return true;
5545 /* vectorizable_load.
5547 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5548 can be vectorized.
5549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5550 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5551 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5553 static bool
5554 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5555 slp_tree slp_node, slp_instance slp_node_instance)
5557 tree scalar_dest;
5558 tree vec_dest = NULL;
5559 tree data_ref = NULL;
5560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5561 stmt_vec_info prev_stmt_info;
5562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5563 struct loop *loop = NULL;
5564 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5565 bool nested_in_vect_loop = false;
5566 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5567 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5568 tree elem_type;
5569 tree new_temp;
5570 enum machine_mode mode;
5571 gimple new_stmt = NULL;
5572 tree dummy;
5573 enum dr_alignment_support alignment_support_scheme;
5574 tree dataref_ptr = NULL_TREE;
5575 tree dataref_offset = NULL_TREE;
5576 gimple ptr_incr = NULL;
5577 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5578 int ncopies;
5579 int i, j, group_size, group_gap;
5580 tree msq = NULL_TREE, lsq;
5581 tree offset = NULL_TREE;
5582 tree realignment_token = NULL_TREE;
5583 gimple phi = NULL;
5584 vec<tree> dr_chain = vNULL;
5585 bool grouped_load = false;
5586 bool load_lanes_p = false;
5587 gimple first_stmt;
5588 bool inv_p;
5589 bool negative = false;
5590 bool compute_in_loop = false;
5591 struct loop *at_loop;
5592 int vec_num;
5593 bool slp = (slp_node != NULL);
5594 bool slp_perm = false;
5595 enum tree_code code;
5596 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5597 int vf;
5598 tree aggr_type;
5599 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5600 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5601 int gather_scale = 1;
5602 enum vect_def_type gather_dt = vect_unknown_def_type;
5604 if (loop_vinfo)
5606 loop = LOOP_VINFO_LOOP (loop_vinfo);
5607 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5608 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5610 else
5611 vf = 1;
5613 /* Multiple types in SLP are handled by creating the appropriate number of
5614 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5615 case of SLP. */
5616 if (slp || PURE_SLP_STMT (stmt_info))
5617 ncopies = 1;
5618 else
5619 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5621 gcc_assert (ncopies >= 1);
5623 /* FORNOW. This restriction should be relaxed. */
5624 if (nested_in_vect_loop && ncopies > 1)
5626 if (dump_enabled_p ())
5627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5628 "multiple types in nested loop.\n");
5629 return false;
5632 /* Invalidate assumptions made by dependence analysis when vectorization
5633 on the unrolled body effectively re-orders stmts. */
5634 if (ncopies > 1
5635 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5636 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5637 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5641 "cannot perform implicit CSE when unrolling "
5642 "with negative dependence distance\n");
5643 return false;
5646 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5647 return false;
5649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5650 return false;
5652 /* Is vectorizable load? */
5653 if (!is_gimple_assign (stmt))
5654 return false;
5656 scalar_dest = gimple_assign_lhs (stmt);
5657 if (TREE_CODE (scalar_dest) != SSA_NAME)
5658 return false;
5660 code = gimple_assign_rhs_code (stmt);
5661 if (code != ARRAY_REF
5662 && code != BIT_FIELD_REF
5663 && code != INDIRECT_REF
5664 && code != COMPONENT_REF
5665 && code != IMAGPART_EXPR
5666 && code != REALPART_EXPR
5667 && code != MEM_REF
5668 && TREE_CODE_CLASS (code) != tcc_declaration)
5669 return false;
5671 if (!STMT_VINFO_DATA_REF (stmt_info))
5672 return false;
5674 elem_type = TREE_TYPE (vectype);
5675 mode = TYPE_MODE (vectype);
5677 /* FORNOW. In some cases can vectorize even if data-type not supported
5678 (e.g. - data copies). */
5679 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5683 "Aligned load, but unsupported type.\n");
5684 return false;
5687 /* Check if the load is a part of an interleaving chain. */
5688 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5690 grouped_load = true;
5691 /* FORNOW */
5692 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5694 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5695 if (!slp && !PURE_SLP_STMT (stmt_info))
5697 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5698 if (vect_load_lanes_supported (vectype, group_size))
5699 load_lanes_p = true;
5700 else if (!vect_grouped_load_supported (vectype, group_size))
5701 return false;
5704 /* Invalidate assumptions made by dependence analysis when vectorization
5705 on the unrolled body effectively re-orders stmts. */
5706 if (!PURE_SLP_STMT (stmt_info)
5707 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5708 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5709 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5713 "cannot perform implicit CSE when performing "
5714 "group loads with negative dependence distance\n");
5715 return false;
5720 if (STMT_VINFO_GATHER_P (stmt_info))
5722 gimple def_stmt;
5723 tree def;
5724 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5725 &gather_off, &gather_scale);
5726 gcc_assert (gather_decl);
5727 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5728 &def_stmt, &def, &gather_dt,
5729 &gather_off_vectype))
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 "gather index use not simple.\n");
5734 return false;
5737 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5739 else
5741 negative = tree_int_cst_compare (nested_in_vect_loop
5742 ? STMT_VINFO_DR_STEP (stmt_info)
5743 : DR_STEP (dr),
5744 size_zero_node) < 0;
5745 if (negative && ncopies > 1)
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5749 "multiple types with negative step.\n");
5750 return false;
5753 if (negative)
5755 if (grouped_load)
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5759 "negative step for group load not supported"
5760 "\n");
5761 return false;
5763 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5764 if (alignment_support_scheme != dr_aligned
5765 && alignment_support_scheme != dr_unaligned_supported)
5767 if (dump_enabled_p ())
5768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5769 "negative step but alignment required.\n");
5770 return false;
5772 if (!perm_mask_for_reverse (vectype))
5774 if (dump_enabled_p ())
5775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5776 "negative step and reversing not supported."
5777 "\n");
5778 return false;
5783 if (!vec_stmt) /* transformation not required. */
5785 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5786 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5787 return true;
5790 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_NOTE, vect_location,
5792 "transform load. ncopies = %d\n", ncopies);
5794 /** Transform. **/
5796 ensure_base_align (stmt_info, dr);
5798 if (STMT_VINFO_GATHER_P (stmt_info))
5800 tree vec_oprnd0 = NULL_TREE, op;
5801 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5802 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5803 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5804 edge pe = loop_preheader_edge (loop);
5805 gimple_seq seq;
5806 basic_block new_bb;
5807 enum { NARROW, NONE, WIDEN } modifier;
5808 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5810 if (nunits == gather_off_nunits)
5811 modifier = NONE;
5812 else if (nunits == gather_off_nunits / 2)
5814 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5815 modifier = WIDEN;
5817 for (i = 0; i < gather_off_nunits; ++i)
5818 sel[i] = i | nunits;
5820 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5821 gcc_assert (perm_mask != NULL_TREE);
5823 else if (nunits == gather_off_nunits * 2)
5825 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5826 modifier = NARROW;
5828 for (i = 0; i < nunits; ++i)
5829 sel[i] = i < gather_off_nunits
5830 ? i : i + nunits - gather_off_nunits;
5832 perm_mask = vect_gen_perm_mask (vectype, sel);
5833 gcc_assert (perm_mask != NULL_TREE);
5834 ncopies *= 2;
5836 else
5837 gcc_unreachable ();
5839 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5840 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5841 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5842 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5843 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5844 scaletype = TREE_VALUE (arglist);
5845 gcc_checking_assert (types_compatible_p (srctype, rettype));
5847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5849 ptr = fold_convert (ptrtype, gather_base);
5850 if (!is_gimple_min_invariant (ptr))
5852 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5853 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5854 gcc_assert (!new_bb);
5857 /* Currently we support only unconditional gather loads,
5858 so mask should be all ones. */
5859 if (TREE_CODE (masktype) == INTEGER_TYPE)
5860 mask = build_int_cst (masktype, -1);
5861 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5863 mask = build_int_cst (TREE_TYPE (masktype), -1);
5864 mask = build_vector_from_val (masktype, mask);
5865 mask = vect_init_vector (stmt, mask, masktype, NULL);
5867 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5869 REAL_VALUE_TYPE r;
5870 long tmp[6];
5871 for (j = 0; j < 6; ++j)
5872 tmp[j] = -1;
5873 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5874 mask = build_real (TREE_TYPE (masktype), r);
5875 mask = build_vector_from_val (masktype, mask);
5876 mask = vect_init_vector (stmt, mask, masktype, NULL);
5878 else
5879 gcc_unreachable ();
5881 scale = build_int_cst (scaletype, gather_scale);
5883 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5884 merge = build_int_cst (TREE_TYPE (rettype), 0);
5885 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5887 REAL_VALUE_TYPE r;
5888 long tmp[6];
5889 for (j = 0; j < 6; ++j)
5890 tmp[j] = 0;
5891 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5892 merge = build_real (TREE_TYPE (rettype), r);
5894 else
5895 gcc_unreachable ();
5896 merge = build_vector_from_val (rettype, merge);
5897 merge = vect_init_vector (stmt, merge, rettype, NULL);
5899 prev_stmt_info = NULL;
5900 for (j = 0; j < ncopies; ++j)
5902 if (modifier == WIDEN && (j & 1))
5903 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5904 perm_mask, stmt, gsi);
5905 else if (j == 0)
5906 op = vec_oprnd0
5907 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5908 else
5909 op = vec_oprnd0
5910 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5912 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5914 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5915 == TYPE_VECTOR_SUBPARTS (idxtype));
5916 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5917 var = make_ssa_name (var, NULL);
5918 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5919 new_stmt
5920 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5921 op, NULL_TREE);
5922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5923 op = var;
5926 new_stmt
5927 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5929 if (!useless_type_conversion_p (vectype, rettype))
5931 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5932 == TYPE_VECTOR_SUBPARTS (rettype));
5933 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5934 op = make_ssa_name (var, new_stmt);
5935 gimple_call_set_lhs (new_stmt, op);
5936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5937 var = make_ssa_name (vec_dest, NULL);
5938 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5939 new_stmt
5940 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5941 NULL_TREE);
5943 else
5945 var = make_ssa_name (vec_dest, new_stmt);
5946 gimple_call_set_lhs (new_stmt, var);
5949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5951 if (modifier == NARROW)
5953 if ((j & 1) == 0)
5955 prev_res = var;
5956 continue;
5958 var = permute_vec_elements (prev_res, var,
5959 perm_mask, stmt, gsi);
5960 new_stmt = SSA_NAME_DEF_STMT (var);
5963 if (prev_stmt_info == NULL)
5964 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5965 else
5966 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5967 prev_stmt_info = vinfo_for_stmt (new_stmt);
5969 return true;
5971 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5973 gimple_stmt_iterator incr_gsi;
5974 bool insert_after;
5975 gimple incr;
5976 tree offvar;
5977 tree ivstep;
5978 tree running_off;
5979 vec<constructor_elt, va_gc> *v = NULL;
5980 gimple_seq stmts = NULL;
5981 tree stride_base, stride_step, alias_off;
5983 gcc_assert (!nested_in_vect_loop);
5985 stride_base
5986 = fold_build_pointer_plus
5987 (unshare_expr (DR_BASE_ADDRESS (dr)),
5988 size_binop (PLUS_EXPR,
5989 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5990 convert_to_ptrofftype (DR_INIT (dr))));
5991 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5993 /* For a load with loop-invariant (but other than power-of-2)
5994 stride (i.e. not a grouped access) like so:
5996 for (i = 0; i < n; i += stride)
5997 ... = array[i];
5999 we generate a new induction variable and new accesses to
6000 form a new vector (or vectors, depending on ncopies):
6002 for (j = 0; ; j += VF*stride)
6003 tmp1 = array[j];
6004 tmp2 = array[j + stride];
6006 vectemp = {tmp1, tmp2, ...}
6009 ivstep = stride_step;
6010 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6011 build_int_cst (TREE_TYPE (ivstep), vf));
6013 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6015 create_iv (stride_base, ivstep, NULL,
6016 loop, &incr_gsi, insert_after,
6017 &offvar, NULL);
6018 incr = gsi_stmt (incr_gsi);
6019 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6021 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6022 if (stmts)
6023 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6025 prev_stmt_info = NULL;
6026 running_off = offvar;
6027 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6028 for (j = 0; j < ncopies; j++)
6030 tree vec_inv;
6032 vec_alloc (v, nunits);
6033 for (i = 0; i < nunits; i++)
6035 tree newref, newoff;
6036 gimple incr;
6037 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6038 running_off, alias_off);
6040 newref = force_gimple_operand_gsi (gsi, newref, true,
6041 NULL_TREE, true,
6042 GSI_SAME_STMT);
6043 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6044 newoff = copy_ssa_name (running_off, NULL);
6045 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6046 running_off, stride_step);
6047 vect_finish_stmt_generation (stmt, incr, gsi);
6049 running_off = newoff;
6052 vec_inv = build_constructor (vectype, v);
6053 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6054 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6056 if (j == 0)
6057 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6058 else
6059 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6060 prev_stmt_info = vinfo_for_stmt (new_stmt);
6062 return true;
6065 if (grouped_load)
6067 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6068 if (slp
6069 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6070 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6071 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6073 /* Check if the chain of loads is already vectorized. */
6074 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6075 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6076 ??? But we can only do so if there is exactly one
6077 as we have no way to get at the rest. Leave the CSE
6078 opportunity alone.
6079 ??? With the group load eventually participating
6080 in multiple different permutations (having multiple
6081 slp nodes which refer to the same group) the CSE
6082 is even wrong code. See PR56270. */
6083 && !slp)
6085 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6086 return true;
6088 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6089 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6091 /* VEC_NUM is the number of vect stmts to be created for this group. */
6092 if (slp)
6094 grouped_load = false;
6095 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6096 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6097 slp_perm = true;
6098 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6100 else
6102 vec_num = group_size;
6103 group_gap = 0;
6106 else
6108 first_stmt = stmt;
6109 first_dr = dr;
6110 group_size = vec_num = 1;
6111 group_gap = 0;
6114 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6115 gcc_assert (alignment_support_scheme);
6116 /* Targets with load-lane instructions must not require explicit
6117 realignment. */
6118 gcc_assert (!load_lanes_p
6119 || alignment_support_scheme == dr_aligned
6120 || alignment_support_scheme == dr_unaligned_supported);
6122 /* In case the vectorization factor (VF) is bigger than the number
6123 of elements that we can fit in a vectype (nunits), we have to generate
6124 more than one vector stmt - i.e - we need to "unroll" the
6125 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6126 from one copy of the vector stmt to the next, in the field
6127 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6128 stages to find the correct vector defs to be used when vectorizing
6129 stmts that use the defs of the current stmt. The example below
6130 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6131 need to create 4 vectorized stmts):
6133 before vectorization:
6134 RELATED_STMT VEC_STMT
6135 S1: x = memref - -
6136 S2: z = x + 1 - -
6138 step 1: vectorize stmt S1:
6139 We first create the vector stmt VS1_0, and, as usual, record a
6140 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6141 Next, we create the vector stmt VS1_1, and record a pointer to
6142 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6143 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6144 stmts and pointers:
6145 RELATED_STMT VEC_STMT
6146 VS1_0: vx0 = memref0 VS1_1 -
6147 VS1_1: vx1 = memref1 VS1_2 -
6148 VS1_2: vx2 = memref2 VS1_3 -
6149 VS1_3: vx3 = memref3 - -
6150 S1: x = load - VS1_0
6151 S2: z = x + 1 - -
6153 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6154 information we recorded in RELATED_STMT field is used to vectorize
6155 stmt S2. */
6157 /* In case of interleaving (non-unit grouped access):
6159 S1: x2 = &base + 2
6160 S2: x0 = &base
6161 S3: x1 = &base + 1
6162 S4: x3 = &base + 3
6164 Vectorized loads are created in the order of memory accesses
6165 starting from the access of the first stmt of the chain:
6167 VS1: vx0 = &base
6168 VS2: vx1 = &base + vec_size*1
6169 VS3: vx3 = &base + vec_size*2
6170 VS4: vx4 = &base + vec_size*3
6172 Then permutation statements are generated:
6174 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6175 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6178 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6179 (the order of the data-refs in the output of vect_permute_load_chain
6180 corresponds to the order of scalar stmts in the interleaving chain - see
6181 the documentation of vect_permute_load_chain()).
6182 The generation of permutation stmts and recording them in
6183 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6185 In case of both multiple types and interleaving, the vector loads and
6186 permutation stmts above are created for every copy. The result vector
6187 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6188 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6190 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6191 on a target that supports unaligned accesses (dr_unaligned_supported)
6192 we generate the following code:
6193 p = initial_addr;
6194 indx = 0;
6195 loop {
6196 p = p + indx * vectype_size;
6197 vec_dest = *(p);
6198 indx = indx + 1;
6201 Otherwise, the data reference is potentially unaligned on a target that
6202 does not support unaligned accesses (dr_explicit_realign_optimized) -
6203 then generate the following code, in which the data in each iteration is
6204 obtained by two vector loads, one from the previous iteration, and one
6205 from the current iteration:
6206 p1 = initial_addr;
6207 msq_init = *(floor(p1))
6208 p2 = initial_addr + VS - 1;
6209 realignment_token = call target_builtin;
6210 indx = 0;
6211 loop {
6212 p2 = p2 + indx * vectype_size
6213 lsq = *(floor(p2))
6214 vec_dest = realign_load (msq, lsq, realignment_token)
6215 indx = indx + 1;
6216 msq = lsq;
6217 } */
6219 /* If the misalignment remains the same throughout the execution of the
6220 loop, we can create the init_addr and permutation mask at the loop
6221 preheader. Otherwise, it needs to be created inside the loop.
6222 This can only occur when vectorizing memory accesses in the inner-loop
6223 nested within an outer-loop that is being vectorized. */
6225 if (nested_in_vect_loop
6226 && (TREE_INT_CST_LOW (DR_STEP (dr))
6227 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6229 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6230 compute_in_loop = true;
6233 if ((alignment_support_scheme == dr_explicit_realign_optimized
6234 || alignment_support_scheme == dr_explicit_realign)
6235 && !compute_in_loop)
6237 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6238 alignment_support_scheme, NULL_TREE,
6239 &at_loop);
6240 if (alignment_support_scheme == dr_explicit_realign_optimized)
6242 phi = SSA_NAME_DEF_STMT (msq);
6243 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6246 else
6247 at_loop = loop;
6249 if (negative)
6250 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6252 if (load_lanes_p)
6253 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6254 else
6255 aggr_type = vectype;
6257 prev_stmt_info = NULL;
6258 for (j = 0; j < ncopies; j++)
6260 /* 1. Create the vector or array pointer update chain. */
6261 if (j == 0)
6263 bool simd_lane_access_p
6264 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6265 if (simd_lane_access_p
6266 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6267 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6268 && integer_zerop (DR_OFFSET (first_dr))
6269 && integer_zerop (DR_INIT (first_dr))
6270 && alias_sets_conflict_p (get_alias_set (aggr_type),
6271 get_alias_set (DR_REF (first_dr)))
6272 && (alignment_support_scheme == dr_aligned
6273 || alignment_support_scheme == dr_unaligned_supported))
6275 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6276 dataref_offset = build_int_cst (reference_alias_ptr_type
6277 (DR_REF (first_dr)), 0);
6278 inv_p = false;
6280 else
6281 dataref_ptr
6282 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6283 offset, &dummy, gsi, &ptr_incr,
6284 simd_lane_access_p, &inv_p);
6286 else if (dataref_offset)
6287 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6288 TYPE_SIZE_UNIT (aggr_type));
6289 else
6290 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6291 TYPE_SIZE_UNIT (aggr_type));
6293 if (grouped_load || slp_perm)
6294 dr_chain.create (vec_num);
6296 if (load_lanes_p)
6298 tree vec_array;
6300 vec_array = create_vector_array (vectype, vec_num);
6302 /* Emit:
6303 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6304 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6305 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6306 gimple_call_set_lhs (new_stmt, vec_array);
6307 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6309 /* Extract each vector into an SSA_NAME. */
6310 for (i = 0; i < vec_num; i++)
6312 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6313 vec_array, i);
6314 dr_chain.quick_push (new_temp);
6317 /* Record the mapping between SSA_NAMEs and statements. */
6318 vect_record_grouped_load_vectors (stmt, dr_chain);
6320 else
6322 for (i = 0; i < vec_num; i++)
6324 if (i > 0)
6325 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6326 stmt, NULL_TREE);
6328 /* 2. Create the vector-load in the loop. */
6329 switch (alignment_support_scheme)
6331 case dr_aligned:
6332 case dr_unaligned_supported:
6334 unsigned int align, misalign;
6336 data_ref
6337 = build2 (MEM_REF, vectype, dataref_ptr,
6338 dataref_offset
6339 ? dataref_offset
6340 : build_int_cst (reference_alias_ptr_type
6341 (DR_REF (first_dr)), 0));
6342 align = TYPE_ALIGN_UNIT (vectype);
6343 if (alignment_support_scheme == dr_aligned)
6345 gcc_assert (aligned_access_p (first_dr));
6346 misalign = 0;
6348 else if (DR_MISALIGNMENT (first_dr) == -1)
6350 TREE_TYPE (data_ref)
6351 = build_aligned_type (TREE_TYPE (data_ref),
6352 TYPE_ALIGN (elem_type));
6353 align = TYPE_ALIGN_UNIT (elem_type);
6354 misalign = 0;
6356 else
6358 TREE_TYPE (data_ref)
6359 = build_aligned_type (TREE_TYPE (data_ref),
6360 TYPE_ALIGN (elem_type));
6361 misalign = DR_MISALIGNMENT (first_dr);
6363 if (dataref_offset == NULL_TREE)
6364 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6365 align, misalign);
6366 break;
6368 case dr_explicit_realign:
6370 tree ptr, bump;
6371 tree vs_minus_1;
6373 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6375 if (compute_in_loop)
6376 msq = vect_setup_realignment (first_stmt, gsi,
6377 &realignment_token,
6378 dr_explicit_realign,
6379 dataref_ptr, NULL);
6381 ptr = copy_ssa_name (dataref_ptr, NULL);
6382 new_stmt = gimple_build_assign_with_ops
6383 (BIT_AND_EXPR, ptr, dataref_ptr,
6384 build_int_cst
6385 (TREE_TYPE (dataref_ptr),
6386 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6387 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6388 data_ref
6389 = build2 (MEM_REF, vectype, ptr,
6390 build_int_cst (reference_alias_ptr_type
6391 (DR_REF (first_dr)), 0));
6392 vec_dest = vect_create_destination_var (scalar_dest,
6393 vectype);
6394 new_stmt = gimple_build_assign (vec_dest, data_ref);
6395 new_temp = make_ssa_name (vec_dest, new_stmt);
6396 gimple_assign_set_lhs (new_stmt, new_temp);
6397 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6398 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6400 msq = new_temp;
6402 bump = size_binop (MULT_EXPR, vs_minus_1,
6403 TYPE_SIZE_UNIT (elem_type));
6404 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6405 new_stmt = gimple_build_assign_with_ops
6406 (BIT_AND_EXPR, NULL_TREE, ptr,
6407 build_int_cst
6408 (TREE_TYPE (ptr),
6409 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6410 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6411 gimple_assign_set_lhs (new_stmt, ptr);
6412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6413 data_ref
6414 = build2 (MEM_REF, vectype, ptr,
6415 build_int_cst (reference_alias_ptr_type
6416 (DR_REF (first_dr)), 0));
6417 break;
6419 case dr_explicit_realign_optimized:
6420 new_temp = copy_ssa_name (dataref_ptr, NULL);
6421 new_stmt = gimple_build_assign_with_ops
6422 (BIT_AND_EXPR, new_temp, dataref_ptr,
6423 build_int_cst
6424 (TREE_TYPE (dataref_ptr),
6425 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6427 data_ref
6428 = build2 (MEM_REF, vectype, new_temp,
6429 build_int_cst (reference_alias_ptr_type
6430 (DR_REF (first_dr)), 0));
6431 break;
6432 default:
6433 gcc_unreachable ();
6435 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6436 new_stmt = gimple_build_assign (vec_dest, data_ref);
6437 new_temp = make_ssa_name (vec_dest, new_stmt);
6438 gimple_assign_set_lhs (new_stmt, new_temp);
6439 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6441 /* 3. Handle explicit realignment if necessary/supported.
6442 Create in loop:
6443 vec_dest = realign_load (msq, lsq, realignment_token) */
6444 if (alignment_support_scheme == dr_explicit_realign_optimized
6445 || alignment_support_scheme == dr_explicit_realign)
6447 lsq = gimple_assign_lhs (new_stmt);
6448 if (!realignment_token)
6449 realignment_token = dataref_ptr;
6450 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6451 new_stmt
6452 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6453 vec_dest, msq, lsq,
6454 realignment_token);
6455 new_temp = make_ssa_name (vec_dest, new_stmt);
6456 gimple_assign_set_lhs (new_stmt, new_temp);
6457 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6459 if (alignment_support_scheme == dr_explicit_realign_optimized)
6461 gcc_assert (phi);
6462 if (i == vec_num - 1 && j == ncopies - 1)
6463 add_phi_arg (phi, lsq,
6464 loop_latch_edge (containing_loop),
6465 UNKNOWN_LOCATION);
6466 msq = lsq;
6470 /* 4. Handle invariant-load. */
6471 if (inv_p && !bb_vinfo)
6473 gcc_assert (!grouped_load);
6474 /* If we have versioned for aliasing or the loop doesn't
6475 have any data dependencies that would preclude this,
6476 then we are sure this is a loop invariant load and
6477 thus we can insert it on the preheader edge. */
6478 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6479 && !nested_in_vect_loop
6480 && hoist_defs_of_uses (stmt, loop))
6482 if (dump_enabled_p ())
6484 dump_printf_loc (MSG_NOTE, vect_location,
6485 "hoisting out of the vectorized "
6486 "loop: ");
6487 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6488 dump_printf (MSG_NOTE, "\n");
6490 tree tem = copy_ssa_name (scalar_dest, NULL);
6491 gsi_insert_on_edge_immediate
6492 (loop_preheader_edge (loop),
6493 gimple_build_assign (tem,
6494 unshare_expr
6495 (gimple_assign_rhs1 (stmt))));
6496 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6498 else
6500 gimple_stmt_iterator gsi2 = *gsi;
6501 gsi_next (&gsi2);
6502 new_temp = vect_init_vector (stmt, scalar_dest,
6503 vectype, &gsi2);
6505 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6506 set_vinfo_for_stmt (new_stmt,
6507 new_stmt_vec_info (new_stmt, loop_vinfo,
6508 bb_vinfo));
6511 if (negative)
6513 tree perm_mask = perm_mask_for_reverse (vectype);
6514 new_temp = permute_vec_elements (new_temp, new_temp,
6515 perm_mask, stmt, gsi);
6516 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6519 /* Collect vector loads and later create their permutation in
6520 vect_transform_grouped_load (). */
6521 if (grouped_load || slp_perm)
6522 dr_chain.quick_push (new_temp);
6524 /* Store vector loads in the corresponding SLP_NODE. */
6525 if (slp && !slp_perm)
6526 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6528 /* Bump the vector pointer to account for a gap. */
6529 if (slp && group_gap != 0)
6531 tree bump = size_binop (MULT_EXPR,
6532 TYPE_SIZE_UNIT (elem_type),
6533 size_int (group_gap));
6534 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6535 stmt, bump);
6539 if (slp && !slp_perm)
6540 continue;
6542 if (slp_perm)
6544 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6545 slp_node_instance, false))
6547 dr_chain.release ();
6548 return false;
6551 else
6553 if (grouped_load)
6555 if (!load_lanes_p)
6556 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6557 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6559 else
6561 if (j == 0)
6562 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6563 else
6564 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6565 prev_stmt_info = vinfo_for_stmt (new_stmt);
6568 dr_chain.release ();
6571 return true;
6574 /* Function vect_is_simple_cond.
6576 Input:
6577 LOOP - the loop that is being vectorized.
6578 COND - Condition that is checked for simple use.
6580 Output:
6581 *COMP_VECTYPE - the vector type for the comparison.
6583 Returns whether a COND can be vectorized. Checks whether
6584 condition operands are supportable using vec_is_simple_use. */
6586 static bool
6587 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6588 bb_vec_info bb_vinfo, tree *comp_vectype)
6590 tree lhs, rhs;
6591 tree def;
6592 enum vect_def_type dt;
6593 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6595 if (!COMPARISON_CLASS_P (cond))
6596 return false;
6598 lhs = TREE_OPERAND (cond, 0);
6599 rhs = TREE_OPERAND (cond, 1);
6601 if (TREE_CODE (lhs) == SSA_NAME)
6603 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6604 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6605 &lhs_def_stmt, &def, &dt, &vectype1))
6606 return false;
6608 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6609 && TREE_CODE (lhs) != FIXED_CST)
6610 return false;
6612 if (TREE_CODE (rhs) == SSA_NAME)
6614 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6615 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6616 &rhs_def_stmt, &def, &dt, &vectype2))
6617 return false;
6619 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6620 && TREE_CODE (rhs) != FIXED_CST)
6621 return false;
6623 *comp_vectype = vectype1 ? vectype1 : vectype2;
6624 return true;
6627 /* vectorizable_condition.
6629 Check if STMT is conditional modify expression that can be vectorized.
6630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6631 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6632 at GSI.
6634 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6635 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6636 else caluse if it is 2).
6638 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6640 bool
6641 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6642 gimple *vec_stmt, tree reduc_def, int reduc_index,
6643 slp_tree slp_node)
6645 tree scalar_dest = NULL_TREE;
6646 tree vec_dest = NULL_TREE;
6647 tree cond_expr, then_clause, else_clause;
6648 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6649 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6650 tree comp_vectype = NULL_TREE;
6651 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6652 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6653 tree vec_compare, vec_cond_expr;
6654 tree new_temp;
6655 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6656 tree def;
6657 enum vect_def_type dt, dts[4];
6658 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6659 int ncopies;
6660 enum tree_code code;
6661 stmt_vec_info prev_stmt_info = NULL;
6662 int i, j;
6663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6664 vec<tree> vec_oprnds0 = vNULL;
6665 vec<tree> vec_oprnds1 = vNULL;
6666 vec<tree> vec_oprnds2 = vNULL;
6667 vec<tree> vec_oprnds3 = vNULL;
6668 tree vec_cmp_type;
6670 if (slp_node || PURE_SLP_STMT (stmt_info))
6671 ncopies = 1;
6672 else
6673 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6675 gcc_assert (ncopies >= 1);
6676 if (reduc_index && ncopies > 1)
6677 return false; /* FORNOW */
6679 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6680 return false;
6682 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6683 return false;
6685 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6686 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6687 && reduc_def))
6688 return false;
6690 /* FORNOW: not yet supported. */
6691 if (STMT_VINFO_LIVE_P (stmt_info))
6693 if (dump_enabled_p ())
6694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6695 "value used after loop.\n");
6696 return false;
6699 /* Is vectorizable conditional operation? */
6700 if (!is_gimple_assign (stmt))
6701 return false;
6703 code = gimple_assign_rhs_code (stmt);
6705 if (code != COND_EXPR)
6706 return false;
6708 cond_expr = gimple_assign_rhs1 (stmt);
6709 then_clause = gimple_assign_rhs2 (stmt);
6710 else_clause = gimple_assign_rhs3 (stmt);
6712 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6713 &comp_vectype)
6714 || !comp_vectype)
6715 return false;
6717 if (TREE_CODE (then_clause) == SSA_NAME)
6719 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6720 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6721 &then_def_stmt, &def, &dt))
6722 return false;
6724 else if (TREE_CODE (then_clause) != INTEGER_CST
6725 && TREE_CODE (then_clause) != REAL_CST
6726 && TREE_CODE (then_clause) != FIXED_CST)
6727 return false;
6729 if (TREE_CODE (else_clause) == SSA_NAME)
6731 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6732 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6733 &else_def_stmt, &def, &dt))
6734 return false;
6736 else if (TREE_CODE (else_clause) != INTEGER_CST
6737 && TREE_CODE (else_clause) != REAL_CST
6738 && TREE_CODE (else_clause) != FIXED_CST)
6739 return false;
6741 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6742 /* The result of a vector comparison should be signed type. */
6743 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6744 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6745 if (vec_cmp_type == NULL_TREE)
6746 return false;
6748 if (!vec_stmt)
6750 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6751 return expand_vec_cond_expr_p (vectype, comp_vectype);
6754 /* Transform. */
6756 if (!slp_node)
6758 vec_oprnds0.create (1);
6759 vec_oprnds1.create (1);
6760 vec_oprnds2.create (1);
6761 vec_oprnds3.create (1);
6764 /* Handle def. */
6765 scalar_dest = gimple_assign_lhs (stmt);
6766 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6768 /* Handle cond expr. */
6769 for (j = 0; j < ncopies; j++)
6771 gimple new_stmt = NULL;
6772 if (j == 0)
6774 if (slp_node)
6776 auto_vec<tree, 4> ops;
6777 auto_vec<vec<tree>, 4> vec_defs;
6779 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6780 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6781 ops.safe_push (then_clause);
6782 ops.safe_push (else_clause);
6783 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6784 vec_oprnds3 = vec_defs.pop ();
6785 vec_oprnds2 = vec_defs.pop ();
6786 vec_oprnds1 = vec_defs.pop ();
6787 vec_oprnds0 = vec_defs.pop ();
6789 ops.release ();
6790 vec_defs.release ();
6792 else
6794 gimple gtemp;
6795 vec_cond_lhs =
6796 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6797 stmt, NULL);
6798 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6799 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6801 vec_cond_rhs =
6802 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6803 stmt, NULL);
6804 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6805 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6806 if (reduc_index == 1)
6807 vec_then_clause = reduc_def;
6808 else
6810 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6811 stmt, NULL);
6812 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6813 NULL, &gtemp, &def, &dts[2]);
6815 if (reduc_index == 2)
6816 vec_else_clause = reduc_def;
6817 else
6819 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6820 stmt, NULL);
6821 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6822 NULL, &gtemp, &def, &dts[3]);
6826 else
6828 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6829 vec_oprnds0.pop ());
6830 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6831 vec_oprnds1.pop ());
6832 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6833 vec_oprnds2.pop ());
6834 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6835 vec_oprnds3.pop ());
6838 if (!slp_node)
6840 vec_oprnds0.quick_push (vec_cond_lhs);
6841 vec_oprnds1.quick_push (vec_cond_rhs);
6842 vec_oprnds2.quick_push (vec_then_clause);
6843 vec_oprnds3.quick_push (vec_else_clause);
6846 /* Arguments are ready. Create the new vector stmt. */
6847 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6849 vec_cond_rhs = vec_oprnds1[i];
6850 vec_then_clause = vec_oprnds2[i];
6851 vec_else_clause = vec_oprnds3[i];
6853 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6854 vec_cond_lhs, vec_cond_rhs);
6855 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6856 vec_compare, vec_then_clause, vec_else_clause);
6858 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6859 new_temp = make_ssa_name (vec_dest, new_stmt);
6860 gimple_assign_set_lhs (new_stmt, new_temp);
6861 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6862 if (slp_node)
6863 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6866 if (slp_node)
6867 continue;
6869 if (j == 0)
6870 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6871 else
6872 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6874 prev_stmt_info = vinfo_for_stmt (new_stmt);
6877 vec_oprnds0.release ();
6878 vec_oprnds1.release ();
6879 vec_oprnds2.release ();
6880 vec_oprnds3.release ();
6882 return true;
6886 /* Make sure the statement is vectorizable. */
6888 bool
6889 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6891 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6892 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6893 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6894 bool ok;
6895 tree scalar_type, vectype;
6896 gimple pattern_stmt;
6897 gimple_seq pattern_def_seq;
6899 if (dump_enabled_p ())
6901 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6902 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6903 dump_printf (MSG_NOTE, "\n");
6906 if (gimple_has_volatile_ops (stmt))
6908 if (dump_enabled_p ())
6909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6910 "not vectorized: stmt has volatile operands\n");
6912 return false;
6915 /* Skip stmts that do not need to be vectorized. In loops this is expected
6916 to include:
6917 - the COND_EXPR which is the loop exit condition
6918 - any LABEL_EXPRs in the loop
6919 - computations that are used only for array indexing or loop control.
6920 In basic blocks we only analyze statements that are a part of some SLP
6921 instance, therefore, all the statements are relevant.
6923 Pattern statement needs to be analyzed instead of the original statement
6924 if the original statement is not relevant. Otherwise, we analyze both
6925 statements. In basic blocks we are called from some SLP instance
6926 traversal, don't analyze pattern stmts instead, the pattern stmts
6927 already will be part of SLP instance. */
6929 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6930 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6931 && !STMT_VINFO_LIVE_P (stmt_info))
6933 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6934 && pattern_stmt
6935 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6936 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6938 /* Analyze PATTERN_STMT instead of the original stmt. */
6939 stmt = pattern_stmt;
6940 stmt_info = vinfo_for_stmt (pattern_stmt);
6941 if (dump_enabled_p ())
6943 dump_printf_loc (MSG_NOTE, vect_location,
6944 "==> examining pattern statement: ");
6945 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6946 dump_printf (MSG_NOTE, "\n");
6949 else
6951 if (dump_enabled_p ())
6952 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6954 return true;
6957 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6958 && node == NULL
6959 && pattern_stmt
6960 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6961 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6963 /* Analyze PATTERN_STMT too. */
6964 if (dump_enabled_p ())
6966 dump_printf_loc (MSG_NOTE, vect_location,
6967 "==> examining pattern statement: ");
6968 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6969 dump_printf (MSG_NOTE, "\n");
6972 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
6973 return false;
6976 if (is_pattern_stmt_p (stmt_info)
6977 && node == NULL
6978 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
6980 gimple_stmt_iterator si;
6982 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
6984 gimple pattern_def_stmt = gsi_stmt (si);
6985 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
6986 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
6988 /* Analyze def stmt of STMT if it's a pattern stmt. */
6989 if (dump_enabled_p ())
6991 dump_printf_loc (MSG_NOTE, vect_location,
6992 "==> examining pattern def statement: ");
6993 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
6994 dump_printf (MSG_NOTE, "\n");
6997 if (!vect_analyze_stmt (pattern_def_stmt,
6998 need_to_vectorize, node))
6999 return false;
7004 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7006 case vect_internal_def:
7007 break;
7009 case vect_reduction_def:
7010 case vect_nested_cycle:
7011 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7012 || relevance == vect_used_in_outer_by_reduction
7013 || relevance == vect_unused_in_scope));
7014 break;
7016 case vect_induction_def:
7017 case vect_constant_def:
7018 case vect_external_def:
7019 case vect_unknown_def_type:
7020 default:
7021 gcc_unreachable ();
7024 if (bb_vinfo)
7026 gcc_assert (PURE_SLP_STMT (stmt_info));
7028 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7029 if (dump_enabled_p ())
7031 dump_printf_loc (MSG_NOTE, vect_location,
7032 "get vectype for scalar type: ");
7033 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7034 dump_printf (MSG_NOTE, "\n");
7037 vectype = get_vectype_for_scalar_type (scalar_type);
7038 if (!vectype)
7040 if (dump_enabled_p ())
7042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7043 "not SLPed: unsupported data-type ");
7044 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7045 scalar_type);
7046 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7048 return false;
7051 if (dump_enabled_p ())
7053 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7054 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7055 dump_printf (MSG_NOTE, "\n");
7058 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7061 if (STMT_VINFO_RELEVANT_P (stmt_info))
7063 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7064 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7065 || (is_gimple_call (stmt)
7066 && gimple_call_lhs (stmt) == NULL_TREE));
7067 *need_to_vectorize = true;
7070 ok = true;
7071 if (!bb_vinfo
7072 && (STMT_VINFO_RELEVANT_P (stmt_info)
7073 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7074 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7075 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7076 || vectorizable_shift (stmt, NULL, NULL, NULL)
7077 || vectorizable_operation (stmt, NULL, NULL, NULL)
7078 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7079 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7080 || vectorizable_call (stmt, NULL, NULL, NULL)
7081 || vectorizable_store (stmt, NULL, NULL, NULL)
7082 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7083 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7084 else
7086 if (bb_vinfo)
7087 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7088 || vectorizable_conversion (stmt, NULL, NULL, node)
7089 || vectorizable_shift (stmt, NULL, NULL, node)
7090 || vectorizable_operation (stmt, NULL, NULL, node)
7091 || vectorizable_assignment (stmt, NULL, NULL, node)
7092 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7093 || vectorizable_call (stmt, NULL, NULL, node)
7094 || vectorizable_store (stmt, NULL, NULL, node)
7095 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7098 if (!ok)
7100 if (dump_enabled_p ())
7102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7103 "not vectorized: relevant stmt not ");
7104 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7105 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7106 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7109 return false;
7112 if (bb_vinfo)
7113 return true;
7115 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7116 need extra handling, except for vectorizable reductions. */
7117 if (STMT_VINFO_LIVE_P (stmt_info)
7118 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7119 ok = vectorizable_live_operation (stmt, NULL, NULL);
7121 if (!ok)
7123 if (dump_enabled_p ())
7125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7126 "not vectorized: live stmt not ");
7127 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7128 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7129 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7132 return false;
7135 return true;
7139 /* Function vect_transform_stmt.
7141 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7143 bool
7144 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7145 bool *grouped_store, slp_tree slp_node,
7146 slp_instance slp_node_instance)
7148 bool is_store = false;
7149 gimple vec_stmt = NULL;
7150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7151 bool done;
7153 switch (STMT_VINFO_TYPE (stmt_info))
7155 case type_demotion_vec_info_type:
7156 case type_promotion_vec_info_type:
7157 case type_conversion_vec_info_type:
7158 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7159 gcc_assert (done);
7160 break;
7162 case induc_vec_info_type:
7163 gcc_assert (!slp_node);
7164 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7165 gcc_assert (done);
7166 break;
7168 case shift_vec_info_type:
7169 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7170 gcc_assert (done);
7171 break;
7173 case op_vec_info_type:
7174 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7175 gcc_assert (done);
7176 break;
7178 case assignment_vec_info_type:
7179 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7180 gcc_assert (done);
7181 break;
7183 case load_vec_info_type:
7184 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7185 slp_node_instance);
7186 gcc_assert (done);
7187 break;
7189 case store_vec_info_type:
7190 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7191 gcc_assert (done);
7192 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7194 /* In case of interleaving, the whole chain is vectorized when the
7195 last store in the chain is reached. Store stmts before the last
7196 one are skipped, and there vec_stmt_info shouldn't be freed
7197 meanwhile. */
7198 *grouped_store = true;
7199 if (STMT_VINFO_VEC_STMT (stmt_info))
7200 is_store = true;
7202 else
7203 is_store = true;
7204 break;
7206 case condition_vec_info_type:
7207 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7208 gcc_assert (done);
7209 break;
7211 case call_vec_info_type:
7212 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7213 stmt = gsi_stmt (*gsi);
7214 if (is_gimple_call (stmt)
7215 && gimple_call_internal_p (stmt)
7216 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7217 is_store = true;
7218 break;
7220 case call_simd_clone_vec_info_type:
7221 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7222 stmt = gsi_stmt (*gsi);
7223 break;
7225 case reduc_vec_info_type:
7226 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7227 gcc_assert (done);
7228 break;
7230 default:
7231 if (!STMT_VINFO_LIVE_P (stmt_info))
7233 if (dump_enabled_p ())
7234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7235 "stmt not supported.\n");
7236 gcc_unreachable ();
7240 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7241 is being vectorized, but outside the immediately enclosing loop. */
7242 if (vec_stmt
7243 && STMT_VINFO_LOOP_VINFO (stmt_info)
7244 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7245 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7246 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7247 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7248 || STMT_VINFO_RELEVANT (stmt_info) ==
7249 vect_used_in_outer_by_reduction))
7251 struct loop *innerloop = LOOP_VINFO_LOOP (
7252 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7253 imm_use_iterator imm_iter;
7254 use_operand_p use_p;
7255 tree scalar_dest;
7256 gimple exit_phi;
7258 if (dump_enabled_p ())
7259 dump_printf_loc (MSG_NOTE, vect_location,
7260 "Record the vdef for outer-loop vectorization.\n");
7262 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7263 (to be used when vectorizing outer-loop stmts that use the DEF of
7264 STMT). */
7265 if (gimple_code (stmt) == GIMPLE_PHI)
7266 scalar_dest = PHI_RESULT (stmt);
7267 else
7268 scalar_dest = gimple_assign_lhs (stmt);
7270 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7272 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7274 exit_phi = USE_STMT (use_p);
7275 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7280 /* Handle stmts whose DEF is used outside the loop-nest that is
7281 being vectorized. */
7282 if (STMT_VINFO_LIVE_P (stmt_info)
7283 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7285 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7286 gcc_assert (done);
7289 if (vec_stmt)
7290 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7292 return is_store;
7296 /* Remove a group of stores (for SLP or interleaving), free their
7297 stmt_vec_info. */
7299 void
7300 vect_remove_stores (gimple first_stmt)
7302 gimple next = first_stmt;
7303 gimple tmp;
7304 gimple_stmt_iterator next_si;
7306 while (next)
7308 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7310 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7311 if (is_pattern_stmt_p (stmt_info))
7312 next = STMT_VINFO_RELATED_STMT (stmt_info);
7313 /* Free the attached stmt_vec_info and remove the stmt. */
7314 next_si = gsi_for_stmt (next);
7315 unlink_stmt_vdef (next);
7316 gsi_remove (&next_si, true);
7317 release_defs (next);
7318 free_stmt_vec_info (next);
7319 next = tmp;
7324 /* Function new_stmt_vec_info.
7326 Create and initialize a new stmt_vec_info struct for STMT. */
7328 stmt_vec_info
7329 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7330 bb_vec_info bb_vinfo)
7332 stmt_vec_info res;
7333 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7335 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7336 STMT_VINFO_STMT (res) = stmt;
7337 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7338 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7339 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7340 STMT_VINFO_LIVE_P (res) = false;
7341 STMT_VINFO_VECTYPE (res) = NULL;
7342 STMT_VINFO_VEC_STMT (res) = NULL;
7343 STMT_VINFO_VECTORIZABLE (res) = true;
7344 STMT_VINFO_IN_PATTERN_P (res) = false;
7345 STMT_VINFO_RELATED_STMT (res) = NULL;
7346 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7347 STMT_VINFO_DATA_REF (res) = NULL;
7349 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7350 STMT_VINFO_DR_OFFSET (res) = NULL;
7351 STMT_VINFO_DR_INIT (res) = NULL;
7352 STMT_VINFO_DR_STEP (res) = NULL;
7353 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7355 if (gimple_code (stmt) == GIMPLE_PHI
7356 && is_loop_header_bb_p (gimple_bb (stmt)))
7357 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7358 else
7359 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7361 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7362 STMT_SLP_TYPE (res) = loop_vect;
7363 GROUP_FIRST_ELEMENT (res) = NULL;
7364 GROUP_NEXT_ELEMENT (res) = NULL;
7365 GROUP_SIZE (res) = 0;
7366 GROUP_STORE_COUNT (res) = 0;
7367 GROUP_GAP (res) = 0;
7368 GROUP_SAME_DR_STMT (res) = NULL;
7370 return res;
7374 /* Create a hash table for stmt_vec_info. */
7376 void
7377 init_stmt_vec_info_vec (void)
7379 gcc_assert (!stmt_vec_info_vec.exists ());
7380 stmt_vec_info_vec.create (50);
7384 /* Free hash table for stmt_vec_info. */
7386 void
7387 free_stmt_vec_info_vec (void)
7389 unsigned int i;
7390 vec_void_p info;
7391 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7392 if (info != NULL)
7393 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7394 gcc_assert (stmt_vec_info_vec.exists ());
7395 stmt_vec_info_vec.release ();
7399 /* Free stmt vectorization related info. */
7401 void
7402 free_stmt_vec_info (gimple stmt)
7404 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7406 if (!stmt_info)
7407 return;
7409 /* Check if this statement has a related "pattern stmt"
7410 (introduced by the vectorizer during the pattern recognition
7411 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7412 too. */
7413 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7415 stmt_vec_info patt_info
7416 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7417 if (patt_info)
7419 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7420 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7421 gimple_set_bb (patt_stmt, NULL);
7422 tree lhs = gimple_get_lhs (patt_stmt);
7423 if (TREE_CODE (lhs) == SSA_NAME)
7424 release_ssa_name (lhs);
7425 if (seq)
7427 gimple_stmt_iterator si;
7428 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7430 gimple seq_stmt = gsi_stmt (si);
7431 gimple_set_bb (seq_stmt, NULL);
7432 lhs = gimple_get_lhs (patt_stmt);
7433 if (TREE_CODE (lhs) == SSA_NAME)
7434 release_ssa_name (lhs);
7435 free_stmt_vec_info (seq_stmt);
7438 free_stmt_vec_info (patt_stmt);
7442 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7443 set_vinfo_for_stmt (stmt, NULL);
7444 free (stmt_info);
7448 /* Function get_vectype_for_scalar_type_and_size.
7450 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7451 by the target. */
7453 static tree
7454 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7456 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
7457 enum machine_mode simd_mode;
7458 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7459 int nunits;
7460 tree vectype;
7462 if (nbytes == 0)
7463 return NULL_TREE;
7465 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7466 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7467 return NULL_TREE;
7469 /* For vector types of elements whose mode precision doesn't
7470 match their types precision we use a element type of mode
7471 precision. The vectorization routines will have to make sure
7472 they support the proper result truncation/extension.
7473 We also make sure to build vector types with INTEGER_TYPE
7474 component type only. */
7475 if (INTEGRAL_TYPE_P (scalar_type)
7476 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7477 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7478 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7479 TYPE_UNSIGNED (scalar_type));
7481 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7482 When the component mode passes the above test simply use a type
7483 corresponding to that mode. The theory is that any use that
7484 would cause problems with this will disable vectorization anyway. */
7485 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7486 && !INTEGRAL_TYPE_P (scalar_type))
7487 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7489 /* We can't build a vector type of elements with alignment bigger than
7490 their size. */
7491 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7492 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7493 TYPE_UNSIGNED (scalar_type));
7495 /* If we felt back to using the mode fail if there was
7496 no scalar type for it. */
7497 if (scalar_type == NULL_TREE)
7498 return NULL_TREE;
7500 /* If no size was supplied use the mode the target prefers. Otherwise
7501 lookup a vector mode of the specified size. */
7502 if (size == 0)
7503 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7504 else
7505 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7506 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7507 if (nunits <= 1)
7508 return NULL_TREE;
7510 vectype = build_vector_type (scalar_type, nunits);
7512 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7513 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7514 return NULL_TREE;
7516 return vectype;
7519 unsigned int current_vector_size;
7521 /* Function get_vectype_for_scalar_type.
7523 Returns the vector type corresponding to SCALAR_TYPE as supported
7524 by the target. */
7526 tree
7527 get_vectype_for_scalar_type (tree scalar_type)
7529 tree vectype;
7530 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7531 current_vector_size);
7532 if (vectype
7533 && current_vector_size == 0)
7534 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7535 return vectype;
7538 /* Function get_same_sized_vectype
7540 Returns a vector type corresponding to SCALAR_TYPE of size
7541 VECTOR_TYPE if supported by the target. */
7543 tree
7544 get_same_sized_vectype (tree scalar_type, tree vector_type)
7546 return get_vectype_for_scalar_type_and_size
7547 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7550 /* Function vect_is_simple_use.
7552 Input:
7553 LOOP_VINFO - the vect info of the loop that is being vectorized.
7554 BB_VINFO - the vect info of the basic block that is being vectorized.
7555 OPERAND - operand of STMT in the loop or bb.
7556 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7558 Returns whether a stmt with OPERAND can be vectorized.
7559 For loops, supportable operands are constants, loop invariants, and operands
7560 that are defined by the current iteration of the loop. Unsupportable
7561 operands are those that are defined by a previous iteration of the loop (as
7562 is the case in reduction/induction computations).
7563 For basic blocks, supportable operands are constants and bb invariants.
7564 For now, operands defined outside the basic block are not supported. */
7566 bool
7567 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7568 bb_vec_info bb_vinfo, gimple *def_stmt,
7569 tree *def, enum vect_def_type *dt)
7571 basic_block bb;
7572 stmt_vec_info stmt_vinfo;
7573 struct loop *loop = NULL;
7575 if (loop_vinfo)
7576 loop = LOOP_VINFO_LOOP (loop_vinfo);
7578 *def_stmt = NULL;
7579 *def = NULL_TREE;
7581 if (dump_enabled_p ())
7583 dump_printf_loc (MSG_NOTE, vect_location,
7584 "vect_is_simple_use: operand ");
7585 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7586 dump_printf (MSG_NOTE, "\n");
7589 if (CONSTANT_CLASS_P (operand))
7591 *dt = vect_constant_def;
7592 return true;
7595 if (is_gimple_min_invariant (operand))
7597 *def = operand;
7598 *dt = vect_external_def;
7599 return true;
7602 if (TREE_CODE (operand) == PAREN_EXPR)
7604 if (dump_enabled_p ())
7605 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7606 operand = TREE_OPERAND (operand, 0);
7609 if (TREE_CODE (operand) != SSA_NAME)
7611 if (dump_enabled_p ())
7612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7613 "not ssa-name.\n");
7614 return false;
7617 *def_stmt = SSA_NAME_DEF_STMT (operand);
7618 if (*def_stmt == NULL)
7620 if (dump_enabled_p ())
7621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7622 "no def_stmt.\n");
7623 return false;
7626 if (dump_enabled_p ())
7628 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7630 dump_printf (MSG_NOTE, "\n");
7633 /* Empty stmt is expected only in case of a function argument.
7634 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7635 if (gimple_nop_p (*def_stmt))
7637 *def = operand;
7638 *dt = vect_external_def;
7639 return true;
7642 bb = gimple_bb (*def_stmt);
7644 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7645 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7646 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7647 *dt = vect_external_def;
7648 else
7650 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7651 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7654 if (*dt == vect_unknown_def_type
7655 || (stmt
7656 && *dt == vect_double_reduction_def
7657 && gimple_code (stmt) != GIMPLE_PHI))
7659 if (dump_enabled_p ())
7660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7661 "Unsupported pattern.\n");
7662 return false;
7665 if (dump_enabled_p ())
7666 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7668 switch (gimple_code (*def_stmt))
7670 case GIMPLE_PHI:
7671 *def = gimple_phi_result (*def_stmt);
7672 break;
7674 case GIMPLE_ASSIGN:
7675 *def = gimple_assign_lhs (*def_stmt);
7676 break;
7678 case GIMPLE_CALL:
7679 *def = gimple_call_lhs (*def_stmt);
7680 if (*def != NULL)
7681 break;
7682 /* FALLTHRU */
7683 default:
7684 if (dump_enabled_p ())
7685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7686 "unsupported defining stmt:\n");
7687 return false;
7690 return true;
7693 /* Function vect_is_simple_use_1.
7695 Same as vect_is_simple_use_1 but also determines the vector operand
7696 type of OPERAND and stores it to *VECTYPE. If the definition of
7697 OPERAND is vect_uninitialized_def, vect_constant_def or
7698 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7699 is responsible to compute the best suited vector type for the
7700 scalar operand. */
7702 bool
7703 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7704 bb_vec_info bb_vinfo, gimple *def_stmt,
7705 tree *def, enum vect_def_type *dt, tree *vectype)
7707 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7708 def, dt))
7709 return false;
7711 /* Now get a vector type if the def is internal, otherwise supply
7712 NULL_TREE and leave it up to the caller to figure out a proper
7713 type for the use stmt. */
7714 if (*dt == vect_internal_def
7715 || *dt == vect_induction_def
7716 || *dt == vect_reduction_def
7717 || *dt == vect_double_reduction_def
7718 || *dt == vect_nested_cycle)
7720 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7722 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7723 && !STMT_VINFO_RELEVANT (stmt_info)
7724 && !STMT_VINFO_LIVE_P (stmt_info))
7725 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7727 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7728 gcc_assert (*vectype != NULL_TREE);
7730 else if (*dt == vect_uninitialized_def
7731 || *dt == vect_constant_def
7732 || *dt == vect_external_def)
7733 *vectype = NULL_TREE;
7734 else
7735 gcc_unreachable ();
7737 return true;
7741 /* Function supportable_widening_operation
7743 Check whether an operation represented by the code CODE is a
7744 widening operation that is supported by the target platform in
7745 vector form (i.e., when operating on arguments of type VECTYPE_IN
7746 producing a result of type VECTYPE_OUT).
7748 Widening operations we currently support are NOP (CONVERT), FLOAT
7749 and WIDEN_MULT. This function checks if these operations are supported
7750 by the target platform either directly (via vector tree-codes), or via
7751 target builtins.
7753 Output:
7754 - CODE1 and CODE2 are codes of vector operations to be used when
7755 vectorizing the operation, if available.
7756 - MULTI_STEP_CVT determines the number of required intermediate steps in
7757 case of multi-step conversion (like char->short->int - in that case
7758 MULTI_STEP_CVT will be 1).
7759 - INTERM_TYPES contains the intermediate type required to perform the
7760 widening operation (short in the above example). */
7762 bool
7763 supportable_widening_operation (enum tree_code code, gimple stmt,
7764 tree vectype_out, tree vectype_in,
7765 enum tree_code *code1, enum tree_code *code2,
7766 int *multi_step_cvt,
7767 vec<tree> *interm_types)
7769 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7770 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7771 struct loop *vect_loop = NULL;
7772 enum machine_mode vec_mode;
7773 enum insn_code icode1, icode2;
7774 optab optab1, optab2;
7775 tree vectype = vectype_in;
7776 tree wide_vectype = vectype_out;
7777 enum tree_code c1, c2;
7778 int i;
7779 tree prev_type, intermediate_type;
7780 enum machine_mode intermediate_mode, prev_mode;
7781 optab optab3, optab4;
7783 *multi_step_cvt = 0;
7784 if (loop_info)
7785 vect_loop = LOOP_VINFO_LOOP (loop_info);
7787 switch (code)
7789 case WIDEN_MULT_EXPR:
7790 /* The result of a vectorized widening operation usually requires
7791 two vectors (because the widened results do not fit into one vector).
7792 The generated vector results would normally be expected to be
7793 generated in the same order as in the original scalar computation,
7794 i.e. if 8 results are generated in each vector iteration, they are
7795 to be organized as follows:
7796 vect1: [res1,res2,res3,res4],
7797 vect2: [res5,res6,res7,res8].
7799 However, in the special case that the result of the widening
7800 operation is used in a reduction computation only, the order doesn't
7801 matter (because when vectorizing a reduction we change the order of
7802 the computation). Some targets can take advantage of this and
7803 generate more efficient code. For example, targets like Altivec,
7804 that support widen_mult using a sequence of {mult_even,mult_odd}
7805 generate the following vectors:
7806 vect1: [res1,res3,res5,res7],
7807 vect2: [res2,res4,res6,res8].
7809 When vectorizing outer-loops, we execute the inner-loop sequentially
7810 (each vectorized inner-loop iteration contributes to VF outer-loop
7811 iterations in parallel). We therefore don't allow to change the
7812 order of the computation in the inner-loop during outer-loop
7813 vectorization. */
7814 /* TODO: Another case in which order doesn't *really* matter is when we
7815 widen and then contract again, e.g. (short)((int)x * y >> 8).
7816 Normally, pack_trunc performs an even/odd permute, whereas the
7817 repack from an even/odd expansion would be an interleave, which
7818 would be significantly simpler for e.g. AVX2. */
7819 /* In any case, in order to avoid duplicating the code below, recurse
7820 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7821 are properly set up for the caller. If we fail, we'll continue with
7822 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7823 if (vect_loop
7824 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7825 && !nested_in_vect_loop_p (vect_loop, stmt)
7826 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7827 stmt, vectype_out, vectype_in,
7828 code1, code2, multi_step_cvt,
7829 interm_types))
7830 return true;
7831 c1 = VEC_WIDEN_MULT_LO_EXPR;
7832 c2 = VEC_WIDEN_MULT_HI_EXPR;
7833 break;
7835 case VEC_WIDEN_MULT_EVEN_EXPR:
7836 /* Support the recursion induced just above. */
7837 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7838 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7839 break;
7841 case WIDEN_LSHIFT_EXPR:
7842 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7843 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7844 break;
7846 CASE_CONVERT:
7847 c1 = VEC_UNPACK_LO_EXPR;
7848 c2 = VEC_UNPACK_HI_EXPR;
7849 break;
7851 case FLOAT_EXPR:
7852 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7853 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7854 break;
7856 case FIX_TRUNC_EXPR:
7857 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7858 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7859 computing the operation. */
7860 return false;
7862 default:
7863 gcc_unreachable ();
7866 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7868 enum tree_code ctmp = c1;
7869 c1 = c2;
7870 c2 = ctmp;
7873 if (code == FIX_TRUNC_EXPR)
7875 /* The signedness is determined from output operand. */
7876 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7877 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7879 else
7881 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7882 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7885 if (!optab1 || !optab2)
7886 return false;
7888 vec_mode = TYPE_MODE (vectype);
7889 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7890 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7891 return false;
7893 *code1 = c1;
7894 *code2 = c2;
7896 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7897 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7898 return true;
7900 /* Check if it's a multi-step conversion that can be done using intermediate
7901 types. */
7903 prev_type = vectype;
7904 prev_mode = vec_mode;
7906 if (!CONVERT_EXPR_CODE_P (code))
7907 return false;
7909 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7910 intermediate steps in promotion sequence. We try
7911 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7912 not. */
7913 interm_types->create (MAX_INTERM_CVT_STEPS);
7914 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7916 intermediate_mode = insn_data[icode1].operand[0].mode;
7917 intermediate_type
7918 = lang_hooks.types.type_for_mode (intermediate_mode,
7919 TYPE_UNSIGNED (prev_type));
7920 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7921 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7923 if (!optab3 || !optab4
7924 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7925 || insn_data[icode1].operand[0].mode != intermediate_mode
7926 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7927 || insn_data[icode2].operand[0].mode != intermediate_mode
7928 || ((icode1 = optab_handler (optab3, intermediate_mode))
7929 == CODE_FOR_nothing)
7930 || ((icode2 = optab_handler (optab4, intermediate_mode))
7931 == CODE_FOR_nothing))
7932 break;
7934 interm_types->quick_push (intermediate_type);
7935 (*multi_step_cvt)++;
7937 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7938 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7939 return true;
7941 prev_type = intermediate_type;
7942 prev_mode = intermediate_mode;
7945 interm_types->release ();
7946 return false;
7950 /* Function supportable_narrowing_operation
7952 Check whether an operation represented by the code CODE is a
7953 narrowing operation that is supported by the target platform in
7954 vector form (i.e., when operating on arguments of type VECTYPE_IN
7955 and producing a result of type VECTYPE_OUT).
7957 Narrowing operations we currently support are NOP (CONVERT) and
7958 FIX_TRUNC. This function checks if these operations are supported by
7959 the target platform directly via vector tree-codes.
7961 Output:
7962 - CODE1 is the code of a vector operation to be used when
7963 vectorizing the operation, if available.
7964 - MULTI_STEP_CVT determines the number of required intermediate steps in
7965 case of multi-step conversion (like int->short->char - in that case
7966 MULTI_STEP_CVT will be 1).
7967 - INTERM_TYPES contains the intermediate type required to perform the
7968 narrowing operation (short in the above example). */
7970 bool
7971 supportable_narrowing_operation (enum tree_code code,
7972 tree vectype_out, tree vectype_in,
7973 enum tree_code *code1, int *multi_step_cvt,
7974 vec<tree> *interm_types)
7976 enum machine_mode vec_mode;
7977 enum insn_code icode1;
7978 optab optab1, interm_optab;
7979 tree vectype = vectype_in;
7980 tree narrow_vectype = vectype_out;
7981 enum tree_code c1;
7982 tree intermediate_type;
7983 enum machine_mode intermediate_mode, prev_mode;
7984 int i;
7985 bool uns;
7987 *multi_step_cvt = 0;
7988 switch (code)
7990 CASE_CONVERT:
7991 c1 = VEC_PACK_TRUNC_EXPR;
7992 break;
7994 case FIX_TRUNC_EXPR:
7995 c1 = VEC_PACK_FIX_TRUNC_EXPR;
7996 break;
7998 case FLOAT_EXPR:
7999 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8000 tree code and optabs used for computing the operation. */
8001 return false;
8003 default:
8004 gcc_unreachable ();
8007 if (code == FIX_TRUNC_EXPR)
8008 /* The signedness is determined from output operand. */
8009 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8010 else
8011 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8013 if (!optab1)
8014 return false;
8016 vec_mode = TYPE_MODE (vectype);
8017 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8018 return false;
8020 *code1 = c1;
8022 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8023 return true;
8025 /* Check if it's a multi-step conversion that can be done using intermediate
8026 types. */
8027 prev_mode = vec_mode;
8028 if (code == FIX_TRUNC_EXPR)
8029 uns = TYPE_UNSIGNED (vectype_out);
8030 else
8031 uns = TYPE_UNSIGNED (vectype);
8033 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8034 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8035 costly than signed. */
8036 if (code == FIX_TRUNC_EXPR && uns)
8038 enum insn_code icode2;
8040 intermediate_type
8041 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8042 interm_optab
8043 = optab_for_tree_code (c1, intermediate_type, optab_default);
8044 if (interm_optab != unknown_optab
8045 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8046 && insn_data[icode1].operand[0].mode
8047 == insn_data[icode2].operand[0].mode)
8049 uns = false;
8050 optab1 = interm_optab;
8051 icode1 = icode2;
8055 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8056 intermediate steps in promotion sequence. We try
8057 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8058 interm_types->create (MAX_INTERM_CVT_STEPS);
8059 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8061 intermediate_mode = insn_data[icode1].operand[0].mode;
8062 intermediate_type
8063 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8064 interm_optab
8065 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8066 optab_default);
8067 if (!interm_optab
8068 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8069 || insn_data[icode1].operand[0].mode != intermediate_mode
8070 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8071 == CODE_FOR_nothing))
8072 break;
8074 interm_types->quick_push (intermediate_type);
8075 (*multi_step_cvt)++;
8077 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8078 return true;
8080 prev_mode = intermediate_mode;
8081 optab1 = interm_optab;
8084 interm_types->release ();
8085 return false;