2013-02-12 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1712d950e609b1356b4b2db0653d668be7de1801
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
49 return STMT_VINFO_VECTYPE (stmt_info);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
62 if (!loop_vinfo)
63 return false;
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
67 return (bb->loop_father == loop->inner);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign, enum vect_cost_model_location where)
79 if (body_cost_vec)
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
85 return (unsigned)
86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
89 else
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple pattern_stmt;
192 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d.", relevant, live_p);
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
202 bool found = false;
203 if (!used_in_pattern)
205 imm_use_iterator imm_iter;
206 use_operand_p use_p;
207 gimple use_stmt;
208 tree lhs;
209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
212 if (is_gimple_assign (stmt))
213 lhs = gimple_assign_lhs (stmt);
214 else
215 lhs = gimple_call_lhs (stmt);
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
219 stmt. */
220 if (TREE_CODE (lhs) == SSA_NAME)
221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
223 if (is_gimple_debug (USE_STMT (use_p)))
224 continue;
225 use_stmt = USE_STMT (use_p);
227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 continue;
230 if (vinfo_for_stmt (use_stmt)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
233 found = true;
234 break;
239 if (!found)
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
248 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE, vect_location,
250 "last stmt in pattern. don't mark"
251 " relevant/live.");
252 stmt_info = vinfo_for_stmt (pattern_stmt);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256 stmt = pattern_stmt;
260 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262 STMT_VINFO_RELEVANT (stmt_info) = relevant;
264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE, vect_location,
269 "already marked relevant/live.");
270 return;
273 worklist->safe_push (stmt);
277 /* Function vect_stmt_relevant_p.
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
287 CHECKME: what other side effects would the vectorizer allow? */
289 static bool
290 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 enum vect_relevant *relevant, bool *live_p)
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 ssa_op_iter op_iter;
295 imm_use_iterator imm_iter;
296 use_operand_p use_p;
297 def_operand_p def_p;
299 *relevant = vect_unused_in_scope;
300 *live_p = false;
302 /* cond stmt other than loop exit cond. */
303 if (is_ctrl_stmt (stmt)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305 != loop_exit_ctrl_vec_info_type)
306 *relevant = vect_used_in_scope;
308 /* changing memory. */
309 if (gimple_code (stmt) != GIMPLE_PHI)
310 if (gimple_vdef (stmt))
312 if (dump_enabled_p ())
313 dump_printf_loc (MSG_NOTE, vect_location,
314 "vec_stmt_relevant_p: stmt has vdefs.");
315 *relevant = vect_used_in_scope;
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
323 basic_block bb = gimple_bb (USE_STMT (use_p));
324 if (!flow_bb_inside_loop_p (loop, bb))
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_NOTE, vect_location,
328 "vec_stmt_relevant_p: used out of loop.");
330 if (is_gimple_debug (USE_STMT (use_p)))
331 continue;
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 gcc_assert (bb == single_exit (loop)->dest);
338 *live_p = true;
343 return (*live_p || *relevant);
347 /* Function exist_non_indexing_operands_for_use_p
349 USE is one of the uses attached to STMT. Check if USE is
350 used in STMT for anything other than indexing an array. */
352 static bool
353 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
355 tree operand;
356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
358 /* USE corresponds to some operand in STMT. If there is no data
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info))
362 return true;
364 /* STMT has a data_ref. FORNOW this means that its of one of
365 the following forms:
366 -1- ARRAY_REF = var
367 -2- var = ARRAY_REF
368 (This should have been verified in analyze_data_refs).
370 'var' in the second case corresponds to a def, not a use,
371 so USE cannot correspond to any operands that are not used
372 for array indexing.
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
377 if (!gimple_assign_copy_p (stmt))
378 return false;
379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380 return false;
381 operand = gimple_assign_rhs1 (stmt);
382 if (TREE_CODE (operand) != SSA_NAME)
383 return false;
385 if (operand == use)
386 return true;
388 return false;
393 Function process_use.
395 Inputs:
396 - a USE in STMT in a loop represented by LOOP_VINFO
397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398 that defined USE. This is done by calling mark_relevant and passing it
399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401 be performed.
403 Outputs:
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408 Exceptions:
409 - case 1: If USE is used only for address computations (e.g. array indexing),
410 which does not need to be directly vectorized, then the liveness/relevance
411 of the respective DEF_STMT is left unchanged.
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
417 Return true if everything is as expected. Return false otherwise. */
419 static bool
420 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
421 enum vect_relevant relevant, vec<gimple> *worklist,
422 bool force)
424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426 stmt_vec_info dstmt_vinfo;
427 basic_block bb, def_bb;
428 tree def;
429 gimple def_stmt;
430 enum vect_def_type dt;
432 /* case 1: we are only interested in uses that need to be vectorized. Uses
433 that are used for address computation are not considered relevant. */
434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
435 return true;
437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
439 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "not vectorized: unsupported use in stmt.");
442 return false;
445 if (!def_stmt || gimple_nop_p (def_stmt))
446 return true;
448 def_bb = gimple_bb (def_stmt);
449 if (!flow_bb_inside_loop_p (loop, def_bb))
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
453 return true;
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo = vinfo_for_stmt (def_stmt);
462 bb = gimple_bb (stmt);
463 if (gimple_code (stmt) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 && gimple_code (def_stmt) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467 && bb->loop_father == def_bb->loop_father)
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE, vect_location,
471 "reduc-stmt defining reduc-phi in the same nest.");
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
477 return true;
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
482 d = def_stmt
483 inner-loop:
484 stmt # use (d)
485 outer-loop-tail-bb:
486 ... */
487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "outer-loop def-stmt defining inner-loop stmt.");
493 switch (relevant)
495 case vect_unused_in_scope:
496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 vect_used_in_scope : vect_unused_in_scope;
498 break;
500 case vect_used_in_outer_by_reduction:
501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
502 relevant = vect_used_by_reduction;
503 break;
505 case vect_used_in_outer:
506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
507 relevant = vect_used_in_scope;
508 break;
510 case vect_used_in_scope:
511 break;
513 default:
514 gcc_unreachable ();
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
521 inner-loop:
522 d = def_stmt
523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
524 stmt # use (d) */
525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "inner-loop def-stmt defining outer-loop stmt.");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
536 vect_used_in_outer_by_reduction : vect_unused_in_scope;
537 break;
539 case vect_used_by_reduction:
540 relevant = vect_used_in_outer_by_reduction;
541 break;
543 case vect_used_in_scope:
544 relevant = vect_used_in_outer;
545 break;
547 default:
548 gcc_unreachable ();
552 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553 is_pattern_stmt_p (stmt_vinfo));
554 return true;
558 /* Function vect_mark_stmts_to_be_vectorized.
560 Not all stmts in the loop need to be vectorized. For example:
562 for i...
563 for j...
564 1. T0 = i + j
565 2. T1 = a[T0]
567 3. j = j + 1
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
572 This pass detects such stmts. */
574 bool
575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
577 vec<gimple> worklist;
578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580 unsigned int nbbs = loop->num_nodes;
581 gimple_stmt_iterator si;
582 gimple stmt;
583 unsigned int i;
584 stmt_vec_info stmt_vinfo;
585 basic_block bb;
586 gimple phi;
587 bool live_p;
588 enum vect_relevant relevant, tmp_relevant;
589 enum vect_def_type def_type;
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE, vect_location,
593 "=== vect_mark_stmts_to_be_vectorized ===");
595 worklist.create (64);
597 /* 1. Init worklist. */
598 for (i = 0; i < nbbs; i++)
600 bb = bbs[i];
601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
603 phi = gsi_stmt (si);
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
610 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
611 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
613 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
615 stmt = gsi_stmt (si);
616 if (dump_enabled_p ())
618 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
622 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
623 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
627 /* 2. Process_worklist */
628 while (worklist.length () > 0)
630 use_operand_p use_p;
631 ssa_op_iter iter;
633 stmt = worklist.pop ();
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642 liveness and relevance properties of STMT. */
643 stmt_vinfo = vinfo_for_stmt (stmt);
644 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
654 live_p = false
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
657 those that are used by a reduction computation, and those that are
658 (also) used by a regular computation. This allows us later on to
659 identify stmts that are used solely by a reduction, and therefore the
660 order of the results that they produce does not have to be kept. */
662 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663 tmp_relevant = relevant;
664 switch (def_type)
666 case vect_reduction_def:
667 switch (tmp_relevant)
669 case vect_unused_in_scope:
670 relevant = vect_used_by_reduction;
671 break;
673 case vect_used_by_reduction:
674 if (gimple_code (stmt) == GIMPLE_PHI)
675 break;
676 /* fall through */
678 default:
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681 "unsupported use of reduction.");
682 worklist.release ();
683 return false;
686 live_p = false;
687 break;
689 case vect_nested_cycle:
690 if (tmp_relevant != vect_unused_in_scope
691 && tmp_relevant != vect_used_in_outer_by_reduction
692 && tmp_relevant != vect_used_in_outer)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of nested cycle.");
698 worklist.release ();
699 return false;
702 live_p = false;
703 break;
705 case vect_double_reduction_def:
706 if (tmp_relevant != vect_unused_in_scope
707 && tmp_relevant != vect_used_by_reduction)
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711 "unsupported use of double reduction.");
713 worklist.release ();
714 return false;
717 live_p = false;
718 break;
720 default:
721 break;
724 if (is_pattern_stmt_p (stmt_vinfo))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738 live_p, relevant, &worklist, false)
739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740 live_p, relevant, &worklist, false))
742 worklist.release ();
743 return false;
745 i = 2;
747 for (; i < gimple_num_ops (stmt); i++)
749 op = gimple_op (stmt, i);
750 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
751 &worklist, false))
753 worklist.release ();
754 return false;
758 else if (is_gimple_call (stmt))
760 for (i = 0; i < gimple_call_num_args (stmt); i++)
762 tree arg = gimple_call_arg (stmt, i);
763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
764 &worklist, false))
766 worklist.release ();
767 return false;
772 else
773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
775 tree op = USE_FROM_PTR (use_p);
776 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
777 &worklist, false))
779 worklist.release ();
780 return false;
784 if (STMT_VINFO_GATHER_P (stmt_vinfo))
786 tree off;
787 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788 gcc_assert (decl);
789 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790 &worklist, true))
792 worklist.release ();
793 return false;
796 } /* while worklist */
798 worklist.release ();
799 return true;
803 /* Function vect_model_simple_cost.
805 Models cost for simple operations, i.e. those that only emit ncopies of a
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
809 void
810 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
811 enum vect_def_type *dt,
812 stmt_vector_for_cost *prologue_cost_vec,
813 stmt_vector_for_cost *body_cost_vec)
815 int i;
816 int inside_cost = 0, prologue_cost = 0;
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info))
820 return;
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i = 0; i < 2; i++)
824 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826 stmt_info, 0, vect_prologue);
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
829 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830 stmt_info, 0, vect_body);
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_NOTE, vect_location,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost, prologue_cost);
839 /* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
844 static void
845 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846 enum vect_def_type *dt, int pwr)
848 int i, tmp;
849 int inside_cost = 0, prologue_cost = 0;
850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852 void *target_cost_data;
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
858 if (loop_vinfo)
859 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860 else
861 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
863 for (i = 0; i < pwr + 1; i++)
865 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866 (i + 1) : i;
867 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
868 vec_promote_demote, stmt_info, 0,
869 vect_body);
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i = 0; i < 2; i++)
874 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876 stmt_info, 0, vect_prologue);
878 if (dump_enabled_p ())
879 dump_printf_loc (MSG_NOTE, vect_location,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost, prologue_cost);
884 /* Function vect_cost_group_size
886 For grouped load or store, return the group_size only if it is the first
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
890 static int
891 vect_cost_group_size (stmt_vec_info stmt_info)
893 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
895 if (first_stmt == STMT_VINFO_STMT (stmt_info))
896 return GROUP_SIZE (stmt_info);
898 return 1;
902 /* Function vect_model_store_cost
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
907 void
908 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909 bool store_lanes_p, enum vect_def_type dt,
910 slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
914 int group_size;
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *first_dr;
917 gimple first_stmt;
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info))
921 return;
923 if (dt == vect_constant_def || dt == vect_external_def)
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
930 if (slp_node)
932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933 group_size = 1;
935 else
937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 group_size = vect_cost_group_size (stmt_info);
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
943 /* Not a grouped access. */
944 else
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1)
956 /* Uses a high and low interleave operation for each needed permute. */
958 int nstmts = ncopies * exact_log2 (group_size) * group_size;
959 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960 stmt_info, 0, vect_body);
962 if (dump_enabled_p ())
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_model_store_cost: strided group_size = %d .",
965 group_size);
968 /* Costs of the stores. */
969 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost, prologue_cost);
978 /* Calculate cost of DR's memory access. */
979 void
980 vect_get_store_cost (struct data_reference *dr, int ncopies,
981 unsigned int *inside_cost,
982 stmt_vector_for_cost *body_cost_vec)
984 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
985 gimple stmt = DR_STMT (dr);
986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988 switch (alignment_support_scheme)
990 case dr_aligned:
992 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993 vector_store, stmt_info, 0,
994 vect_body);
996 if (dump_enabled_p ())
997 dump_printf_loc (MSG_NOTE, vect_location,
998 "vect_model_store_cost: aligned.");
999 break;
1002 case dr_unaligned_supported:
1004 /* Here, we assign an additional cost for the unaligned store. */
1005 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1006 unaligned_store, stmt_info,
1007 DR_MISALIGNMENT (dr), vect_body);
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: unaligned supported by "
1011 "hardware.");
1012 break;
1015 case dr_unaligned_unsupported:
1017 *inside_cost = VECT_MAX_COST;
1019 if (dump_enabled_p ())
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021 "vect_model_store_cost: unsupported access.");
1022 break;
1025 default:
1026 gcc_unreachable ();
1031 /* Function vect_model_load_cost
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
1035 accesses are supported for loads, we also account for the costs of the
1036 access scheme chosen. */
1038 void
1039 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040 bool load_lanes_p, slp_tree slp_node,
1041 stmt_vector_for_cost *prologue_cost_vec,
1042 stmt_vector_for_cost *body_cost_vec)
1044 int group_size;
1045 gimple first_stmt;
1046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1047 unsigned int inside_cost = 0, prologue_cost = 0;
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info))
1051 return;
1053 /* Grouped accesses? */
1054 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1057 group_size = vect_cost_group_size (stmt_info);
1058 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 /* Not a grouped access. */
1061 else
1063 group_size = 1;
1064 first_dr = dr;
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p && group_size > 1)
1073 /* Uses an even and odd extract operations for each needed permute. */
1074 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1075 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .",
1081 group_size);
1084 /* The loads themselves. */
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1089 inside_cost += record_stmt_cost (body_cost_vec,
1090 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1091 scalar_load, stmt_info, 0, vect_body);
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1095 else
1096 vect_get_load_cost (first_dr, ncopies,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098 || group_size > 1 || slp_node),
1099 &inside_cost, &prologue_cost,
1100 prologue_cost_vec, body_cost_vec, true);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost, prologue_cost);
1109 /* Calculate cost of DR's memory access. */
1110 void
1111 vect_get_load_cost (struct data_reference *dr, int ncopies,
1112 bool add_realign_cost, unsigned int *inside_cost,
1113 unsigned int *prologue_cost,
1114 stmt_vector_for_cost *prologue_cost_vec,
1115 stmt_vector_for_cost *body_cost_vec,
1116 bool record_prologue_costs)
1118 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1119 gimple stmt = DR_STMT (dr);
1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1122 switch (alignment_support_scheme)
1124 case dr_aligned:
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127 stmt_info, 0, vect_body);
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE, vect_location,
1131 "vect_model_load_cost: aligned.");
1133 break;
1135 case dr_unaligned_supported:
1137 /* Here, we assign an additional cost for the unaligned load. */
1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1139 unaligned_load, stmt_info,
1140 DR_MISALIGNMENT (dr), vect_body);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: unaligned supported by "
1145 "hardware.");
1147 break;
1149 case dr_explicit_realign:
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152 vector_load, stmt_info, 0, vect_body);
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154 vec_perm, stmt_info, 0, vect_body);
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
1158 prologue costs. */
1159 if (targetm.vectorize.builtin_mask_for_load)
1160 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161 stmt_info, 0, vect_body);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: explicit realign");
1167 break;
1169 case dr_explicit_realign_optimized:
1171 if (dump_enabled_p ())
1172 dump_printf_loc (MSG_NOTE, vect_location,
1173 "vect_model_load_cost: unaligned software "
1174 "pipelined.");
1176 /* Unaligned software pipeline has a load of an address, an initial
1177 load, and possibly a mask operation to "prime" the loop. However,
1178 if this is an access in a group of loads, which provide grouped
1179 access, then the above cost should only be considered for one
1180 access in the group. Inside the loop, there is a load op
1181 and a realignment op. */
1183 if (add_realign_cost && record_prologue_costs)
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
1188 if (targetm.vectorize.builtin_mask_for_load)
1189 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190 vector_stmt, stmt_info,
1191 0, vect_prologue);
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195 stmt_info, 0, vect_body);
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197 stmt_info, 0, vect_body);
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign optimized");
1203 break;
1206 case dr_unaligned_unsupported:
1208 *inside_cost = VECT_MAX_COST;
1210 if (dump_enabled_p ())
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212 "vect_model_load_cost: unsupported access.");
1213 break;
1216 default:
1217 gcc_unreachable ();
1221 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
1224 static void
1225 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1227 if (gsi)
1228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1229 else
1231 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1234 if (loop_vinfo)
1236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1237 basic_block new_bb;
1238 edge pe;
1240 if (nested_in_vect_loop_p (loop, stmt))
1241 loop = loop->inner;
1243 pe = loop_preheader_edge (loop);
1244 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1245 gcc_assert (!new_bb);
1247 else
1249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250 basic_block bb;
1251 gimple_stmt_iterator gsi_bb_start;
1253 gcc_assert (bb_vinfo);
1254 bb = BB_VINFO_BB (bb_vinfo);
1255 gsi_bb_start = gsi_after_labels (bb);
1256 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268 /* Function vect_init_vector.
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1278 tree
1279 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1281 tree new_var;
1282 gimple init_stmt;
1283 tree vec_oprnd;
1284 tree new_temp;
1286 if (TREE_CODE (type) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1291 if (CONSTANT_CLASS_P (val))
1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1293 else
1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1297 new_temp, val,
1298 NULL_TREE);
1299 vect_init_vector_1 (stmt, init_stmt, gsi);
1300 val = new_temp;
1303 val = build_vector_from_val (type, val);
1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1307 init_stmt = gimple_build_assign (new_var, val);
1308 new_temp = make_ssa_name (new_var, init_stmt);
1309 gimple_assign_set_lhs (init_stmt, new_temp);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 vec_oprnd = gimple_assign_lhs (init_stmt);
1312 return vec_oprnd;
1316 /* Function vect_get_vec_def_for_operand.
1318 OP is an operand in STMT. This function returns a (vector) def that will be
1319 used in the vectorized stmt for STMT.
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1327 tree
1328 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1330 tree vec_oprnd;
1331 gimple vec_stmt;
1332 gimple def_stmt;
1333 stmt_vec_info def_stmt_info = NULL;
1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1335 unsigned int nunits;
1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1337 tree def;
1338 enum vect_def_type dt;
1339 bool is_simple_use;
1340 tree vector_type;
1342 if (dump_enabled_p ())
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1349 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350 &def_stmt, &def, &dt);
1351 gcc_assert (is_simple_use);
1352 if (dump_enabled_p ())
1354 int loc_printed = 0;
1355 if (def)
1357 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1358 loc_printed = 1;
1359 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1361 if (def_stmt)
1363 if (loc_printed)
1364 dump_printf (MSG_NOTE, " def_stmt = ");
1365 else
1366 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1371 switch (dt)
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def:
1376 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377 gcc_assert (vector_type);
1378 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1380 if (scalar_def)
1381 *scalar_def = op;
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "Create vector_cst. nunits = %d", nunits);
1388 return vect_init_vector (stmt, op, vector_type, NULL);
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
1392 case vect_external_def:
1394 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395 gcc_assert (vector_type);
1397 if (scalar_def)
1398 *scalar_def = def;
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
1401 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1404 return vect_init_vector (stmt, def, vector_type, NULL);
1407 /* Case 3: operand is defined inside the loop. */
1408 case vect_internal_def:
1410 if (scalar_def)
1411 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info = vinfo_for_stmt (def_stmt);
1416 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1417 /* Get vectorized pattern statement. */
1418 if (!vec_stmt
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info))
1421 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1423 gcc_assert (vec_stmt);
1424 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425 vec_oprnd = PHI_RESULT (vec_stmt);
1426 else if (is_gimple_call (vec_stmt))
1427 vec_oprnd = gimple_call_lhs (vec_stmt);
1428 else
1429 vec_oprnd = gimple_assign_lhs (vec_stmt);
1430 return vec_oprnd;
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def:
1435 case vect_double_reduction_def:
1436 case vect_nested_cycle:
1438 struct loop *loop;
1440 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1441 loop = (gimple_bb (def_stmt))->loop_father;
1443 /* Get the def before the loop */
1444 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445 return get_initial_def_for_reduction (stmt, op, scalar_def);
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def:
1451 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info = vinfo_for_stmt (def_stmt);
1455 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1456 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457 vec_oprnd = PHI_RESULT (vec_stmt);
1458 else
1459 vec_oprnd = gimple_get_lhs (vec_stmt);
1460 return vec_oprnd;
1463 default:
1464 gcc_unreachable ();
1469 /* Function vect_get_vec_def_for_stmt_copy
1471 Return a vector-def for an operand. This function is used when the
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
1474 copies of the vector-stmt are required. In this case the vector-def is
1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476 of the stmt that defines VEC_OPRND.
1477 DT is the type of the vector def VEC_OPRND.
1479 Context:
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
1482 more than one vector stmt to vectorize the scalar stmt. This situation
1483 arises when there are multiple data-types operated upon in the loop; the
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
1487 computing 'VF' results in each iteration). This function is called when
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
1496 VS1.3: vx.3 = memref3
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
1507 get the relevant vector-def for each operand of S2. For operand x it
1508 returns the vector-def 'vx.0'.
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525 tree
1526 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1528 gimple vec_stmt_for_operand;
1529 stmt_vec_info def_stmt_info;
1531 /* Do nothing; can reuse same def. */
1532 if (dt == vect_external_def || dt == vect_constant_def )
1533 return vec_oprnd;
1535 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537 gcc_assert (def_stmt_info);
1538 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539 gcc_assert (vec_stmt_for_operand);
1540 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1552 static void
1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
1557 tree vec_oprnd = vec_oprnds0->pop ();
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1560 vec_oprnds0->quick_push (vec_oprnd);
1562 if (vec_oprnds1 && vec_oprnds1->length ())
1564 vec_oprnd = vec_oprnds1->pop ();
1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1566 vec_oprnds1->quick_push (vec_oprnd);
1571 /* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
1575 void
1576 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
1579 slp_tree slp_node, int reduc_index)
1581 if (slp_node)
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
1584 vec<tree> ops;
1585 ops.create (nops);
1586 vec<slp_void_p> vec_defs;
1587 vec_defs.create (nops);
1589 ops.quick_push (op0);
1590 if (op1)
1591 ops.quick_push (op1);
1593 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1595 *vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
1596 if (op1)
1597 *vec_oprnds1 = *((vec<tree> *) vec_defs[1]);
1599 ops.release ();
1600 vec_defs.release ();
1602 else
1604 tree vec_oprnd;
1606 vec_oprnds0->create (1);
1607 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1608 vec_oprnds0->quick_push (vec_oprnd);
1610 if (op1)
1612 vec_oprnds1->create (1);
1613 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1614 vec_oprnds1->quick_push (vec_oprnd);
1620 /* Function vect_finish_stmt_generation.
1622 Insert a new stmt. */
1624 void
1625 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626 gimple_stmt_iterator *gsi)
1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1632 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1637 gimple at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648 && ((is_gimple_assign (vec_stmt)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650 || (is_gimple_call (vec_stmt)
1651 && !(gimple_call_flags (vec_stmt)
1652 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1654 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655 gimple_set_vdef (vec_stmt, new_vdef);
1656 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1660 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1662 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1663 bb_vinfo));
1665 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1671 gimple_set_location (vec_stmt, gimple_location (stmt));
1674 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1678 tree
1679 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1681 tree fndecl = gimple_call_fndecl (call);
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686 return NULL_TREE;
1688 if (!fndecl
1689 || TREE_CODE (fndecl) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl))
1691 return NULL_TREE;
1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1694 vectype_in);
1697 /* Function vectorizable_call.
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1704 static bool
1705 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 slp_tree slp_node)
1708 tree vec_dest;
1709 tree scalar_dest;
1710 tree op, type;
1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713 tree vectype_out, vectype_in;
1714 int nunits_in;
1715 int nunits_out;
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1718 tree fndecl, new_temp, def, rhs_type;
1719 gimple def_stmt;
1720 enum vect_def_type dt[3]
1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1722 gimple new_stmt = NULL;
1723 int ncopies, j;
1724 vec<tree> vargs = vNULL;
1725 enum { NARROW, NONE, WIDEN } modifier;
1726 size_t i, nargs;
1727 tree lhs;
1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1730 return false;
1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1733 return false;
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt))
1737 return false;
1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740 return false;
1742 if (stmt_can_throw_internal (stmt))
1743 return false;
1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1747 /* Process function arguments. */
1748 rhs_type = NULL_TREE;
1749 vectype_in = NULL_TREE;
1750 nargs = gimple_call_num_args (stmt);
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs == 0 || nargs > 3)
1756 return false;
1758 for (i = 0; i < nargs; i++)
1760 tree opvectype;
1762 op = gimple_call_arg (stmt, i);
1764 /* We can only handle calls with arguments of the same type. */
1765 if (rhs_type
1766 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1768 if (dump_enabled_p ())
1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1770 "argument types differ.");
1771 return false;
1773 if (!rhs_type)
1774 rhs_type = TREE_TYPE (op);
1776 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1777 &def_stmt, &def, &dt[i], &opvectype))
1779 if (dump_enabled_p ())
1780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1781 "use not simple.");
1782 return false;
1785 if (!vectype_in)
1786 vectype_in = opvectype;
1787 else if (opvectype
1788 && opvectype != vectype_in)
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1792 "argument vector types differ.");
1793 return false;
1796 /* If all arguments are external or constant defs use a vector type with
1797 the same size as the output vector type. */
1798 if (!vectype_in)
1799 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1800 if (vec_stmt)
1801 gcc_assert (vectype_in);
1802 if (!vectype_in)
1804 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "no vectype for scalar type ");
1808 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1811 return false;
1814 /* FORNOW */
1815 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1816 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1817 if (nunits_in == nunits_out / 2)
1818 modifier = NARROW;
1819 else if (nunits_out == nunits_in)
1820 modifier = NONE;
1821 else if (nunits_out == nunits_in / 2)
1822 modifier = WIDEN;
1823 else
1824 return false;
1826 /* For now, we only vectorize functions if a target specific builtin
1827 is available. TODO -- in some cases, it might be profitable to
1828 insert the calls for pieces of the vector, in order to be able
1829 to vectorize other operations in the loop. */
1830 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1831 if (fndecl == NULL_TREE)
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "function is not vectorizable.");
1837 return false;
1840 gcc_assert (!gimple_vuse (stmt));
1842 if (slp_node || PURE_SLP_STMT (stmt_info))
1843 ncopies = 1;
1844 else if (modifier == NARROW)
1845 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1846 else
1847 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1849 /* Sanity check: make sure that at least one copy of the vectorized stmt
1850 needs to be generated. */
1851 gcc_assert (ncopies >= 1);
1853 if (!vec_stmt) /* transformation not required. */
1855 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1858 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1859 return true;
1862 /** Transform. **/
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1867 /* Handle def. */
1868 scalar_dest = gimple_call_lhs (stmt);
1869 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1871 prev_stmt_info = NULL;
1872 switch (modifier)
1874 case NONE:
1875 for (j = 0; j < ncopies; ++j)
1877 /* Build argument list for the vectorized call. */
1878 if (j == 0)
1879 vargs.create (nargs);
1880 else
1881 vargs.truncate (0);
1883 if (slp_node)
1885 vec<slp_void_p> vec_defs;
1886 vec_defs.create (nargs);
1887 vec<tree> vec_oprnds0;
1889 for (i = 0; i < nargs; i++)
1890 vargs.quick_push (gimple_call_arg (stmt, i));
1891 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1892 vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
1894 /* Arguments are ready. Create the new vector stmt. */
1895 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1897 size_t k;
1898 for (k = 0; k < nargs; k++)
1900 vec<tree> vec_oprndsk = *((vec<tree> *) vec_defs[k]);
1901 vargs[k] = vec_oprndsk[i];
1903 new_stmt = gimple_build_call_vec (fndecl, vargs);
1904 new_temp = make_ssa_name (vec_dest, new_stmt);
1905 gimple_call_set_lhs (new_stmt, new_temp);
1906 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1907 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1910 for (i = 0; i < nargs; i++)
1912 vec<tree> vec_oprndsi = *((vec<tree> *) vec_defs[i]);
1913 vec_oprndsi.release ();
1915 vec_defs.release ();
1916 continue;
1919 for (i = 0; i < nargs; i++)
1921 op = gimple_call_arg (stmt, i);
1922 if (j == 0)
1923 vec_oprnd0
1924 = vect_get_vec_def_for_operand (op, stmt, NULL);
1925 else
1927 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1928 vec_oprnd0
1929 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1932 vargs.quick_push (vec_oprnd0);
1935 new_stmt = gimple_build_call_vec (fndecl, vargs);
1936 new_temp = make_ssa_name (vec_dest, new_stmt);
1937 gimple_call_set_lhs (new_stmt, new_temp);
1938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1940 if (j == 0)
1941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1942 else
1943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1945 prev_stmt_info = vinfo_for_stmt (new_stmt);
1948 break;
1950 case NARROW:
1951 for (j = 0; j < ncopies; ++j)
1953 /* Build argument list for the vectorized call. */
1954 if (j == 0)
1955 vargs.create (nargs * 2);
1956 else
1957 vargs.truncate (0);
1959 if (slp_node)
1961 vec<slp_void_p> vec_defs;
1962 vec_defs.create (nargs);
1963 vec<tree> vec_oprnds0;
1965 for (i = 0; i < nargs; i++)
1966 vargs.quick_push (gimple_call_arg (stmt, i));
1967 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1968 vec_oprnds0 = *((vec<tree> *) vec_defs[0]);
1970 /* Arguments are ready. Create the new vector stmt. */
1971 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
1973 size_t k;
1974 vargs.truncate (0);
1975 for (k = 0; k < nargs; k++)
1977 vec<tree> vec_oprndsk = *((vec<tree> *) vec_defs[k]);
1978 vargs.quick_push (vec_oprndsk[i]);
1979 vargs.quick_push (vec_oprndsk[i + 1]);
1981 new_stmt = gimple_build_call_vec (fndecl, vargs);
1982 new_temp = make_ssa_name (vec_dest, new_stmt);
1983 gimple_call_set_lhs (new_stmt, new_temp);
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1988 for (i = 0; i < nargs; i++)
1990 vec<tree> vec_oprndsi = *((vec<tree> *) vec_defs[i]);
1991 vec_oprndsi.release ();
1993 vec_defs.release ();
1994 continue;
1997 for (i = 0; i < nargs; i++)
1999 op = gimple_call_arg (stmt, i);
2000 if (j == 0)
2002 vec_oprnd0
2003 = vect_get_vec_def_for_operand (op, stmt, NULL);
2004 vec_oprnd1
2005 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2007 else
2009 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2010 vec_oprnd0
2011 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2012 vec_oprnd1
2013 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2016 vargs.quick_push (vec_oprnd0);
2017 vargs.quick_push (vec_oprnd1);
2020 new_stmt = gimple_build_call_vec (fndecl, vargs);
2021 new_temp = make_ssa_name (vec_dest, new_stmt);
2022 gimple_call_set_lhs (new_stmt, new_temp);
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025 if (j == 0)
2026 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2027 else
2028 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2030 prev_stmt_info = vinfo_for_stmt (new_stmt);
2033 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2035 break;
2037 case WIDEN:
2038 /* No current target implements this case. */
2039 return false;
2042 vargs.release ();
2044 /* Update the exception handling table with the vector stmt if necessary. */
2045 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2046 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2048 /* The call in STMT might prevent it from being removed in dce.
2049 We however cannot remove it here, due to the way the ssa name
2050 it defines is mapped to the new definition. So just replace
2051 rhs of the statement with something harmless. */
2053 if (slp_node)
2054 return true;
2056 type = TREE_TYPE (scalar_dest);
2057 if (is_pattern_stmt_p (stmt_info))
2058 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2059 else
2060 lhs = gimple_call_lhs (stmt);
2061 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2062 set_vinfo_for_stmt (new_stmt, stmt_info);
2063 set_vinfo_for_stmt (stmt, NULL);
2064 STMT_VINFO_STMT (stmt_info) = new_stmt;
2065 gsi_replace (gsi, new_stmt, false);
2066 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2068 return true;
2072 /* Function vect_gen_widened_results_half
2074 Create a vector stmt whose code, type, number of arguments, and result
2075 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2076 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2077 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2078 needs to be created (DECL is a function-decl of a target-builtin).
2079 STMT is the original scalar stmt that we are vectorizing. */
2081 static gimple
2082 vect_gen_widened_results_half (enum tree_code code,
2083 tree decl,
2084 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2085 tree vec_dest, gimple_stmt_iterator *gsi,
2086 gimple stmt)
2088 gimple new_stmt;
2089 tree new_temp;
2091 /* Generate half of the widened result: */
2092 if (code == CALL_EXPR)
2094 /* Target specific support */
2095 if (op_type == binary_op)
2096 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2097 else
2098 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2099 new_temp = make_ssa_name (vec_dest, new_stmt);
2100 gimple_call_set_lhs (new_stmt, new_temp);
2102 else
2104 /* Generic support */
2105 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2106 if (op_type != binary_op)
2107 vec_oprnd1 = NULL;
2108 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2109 vec_oprnd1);
2110 new_temp = make_ssa_name (vec_dest, new_stmt);
2111 gimple_assign_set_lhs (new_stmt, new_temp);
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2115 return new_stmt;
2119 /* Get vectorized definitions for loop-based vectorization. For the first
2120 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2121 scalar operand), and for the rest we get a copy with
2122 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2123 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2124 The vectors are collected into VEC_OPRNDS. */
2126 static void
2127 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2128 vec<tree> *vec_oprnds, int multi_step_cvt)
2130 tree vec_oprnd;
2132 /* Get first vector operand. */
2133 /* All the vector operands except the very first one (that is scalar oprnd)
2134 are stmt copies. */
2135 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2136 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2137 else
2138 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2140 vec_oprnds->quick_push (vec_oprnd);
2142 /* Get second vector operand. */
2143 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2144 vec_oprnds->quick_push (vec_oprnd);
2146 *oprnd = vec_oprnd;
2148 /* For conversion in multiple steps, continue to get operands
2149 recursively. */
2150 if (multi_step_cvt)
2151 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2155 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2156 For multi-step conversions store the resulting vectors and call the function
2157 recursively. */
2159 static void
2160 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2161 int multi_step_cvt, gimple stmt,
2162 vec<tree> vec_dsts,
2163 gimple_stmt_iterator *gsi,
2164 slp_tree slp_node, enum tree_code code,
2165 stmt_vec_info *prev_stmt_info)
2167 unsigned int i;
2168 tree vop0, vop1, new_tmp, vec_dest;
2169 gimple new_stmt;
2170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2172 vec_dest = vec_dsts.pop ();
2174 for (i = 0; i < vec_oprnds->length (); i += 2)
2176 /* Create demotion operation. */
2177 vop0 = (*vec_oprnds)[i];
2178 vop1 = (*vec_oprnds)[i + 1];
2179 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2180 new_tmp = make_ssa_name (vec_dest, new_stmt);
2181 gimple_assign_set_lhs (new_stmt, new_tmp);
2182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2184 if (multi_step_cvt)
2185 /* Store the resulting vector for next recursive call. */
2186 (*vec_oprnds)[i/2] = new_tmp;
2187 else
2189 /* This is the last step of the conversion sequence. Store the
2190 vectors in SLP_NODE or in vector info of the scalar statement
2191 (or in STMT_VINFO_RELATED_STMT chain). */
2192 if (slp_node)
2193 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2194 else
2196 if (!*prev_stmt_info)
2197 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2198 else
2199 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2201 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2206 /* For multi-step demotion operations we first generate demotion operations
2207 from the source type to the intermediate types, and then combine the
2208 results (stored in VEC_OPRNDS) in demotion operation to the destination
2209 type. */
2210 if (multi_step_cvt)
2212 /* At each level of recursion we have half of the operands we had at the
2213 previous level. */
2214 vec_oprnds->truncate ((i+1)/2);
2215 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2216 stmt, vec_dsts, gsi, slp_node,
2217 VEC_PACK_TRUNC_EXPR,
2218 prev_stmt_info);
2221 vec_dsts.quick_push (vec_dest);
2225 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2226 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2227 the resulting vectors and call the function recursively. */
2229 static void
2230 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2231 vec<tree> *vec_oprnds1,
2232 gimple stmt, tree vec_dest,
2233 gimple_stmt_iterator *gsi,
2234 enum tree_code code1,
2235 enum tree_code code2, tree decl1,
2236 tree decl2, int op_type)
2238 int i;
2239 tree vop0, vop1, new_tmp1, new_tmp2;
2240 gimple new_stmt1, new_stmt2;
2241 vec<tree> vec_tmp = vNULL;
2243 vec_tmp.create (vec_oprnds0->length () * 2);
2244 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2246 if (op_type == binary_op)
2247 vop1 = (*vec_oprnds1)[i];
2248 else
2249 vop1 = NULL_TREE;
2251 /* Generate the two halves of promotion operation. */
2252 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2253 op_type, vec_dest, gsi, stmt);
2254 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2255 op_type, vec_dest, gsi, stmt);
2256 if (is_gimple_call (new_stmt1))
2258 new_tmp1 = gimple_call_lhs (new_stmt1);
2259 new_tmp2 = gimple_call_lhs (new_stmt2);
2261 else
2263 new_tmp1 = gimple_assign_lhs (new_stmt1);
2264 new_tmp2 = gimple_assign_lhs (new_stmt2);
2267 /* Store the results for the next step. */
2268 vec_tmp.quick_push (new_tmp1);
2269 vec_tmp.quick_push (new_tmp2);
2272 vec_oprnds0->truncate (0);
2273 *vec_oprnds0 = vec_tmp;
2277 /* Check if STMT performs a conversion operation, that can be vectorized.
2278 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2279 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2280 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2282 static bool
2283 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2284 gimple *vec_stmt, slp_tree slp_node)
2286 tree vec_dest;
2287 tree scalar_dest;
2288 tree op0, op1 = NULL_TREE;
2289 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2292 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2293 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2294 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2295 tree new_temp;
2296 tree def;
2297 gimple def_stmt;
2298 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2299 gimple new_stmt = NULL;
2300 stmt_vec_info prev_stmt_info;
2301 int nunits_in;
2302 int nunits_out;
2303 tree vectype_out, vectype_in;
2304 int ncopies, i, j;
2305 tree lhs_type, rhs_type;
2306 enum { NARROW, NONE, WIDEN } modifier;
2307 vec<tree> vec_oprnds0 = vNULL;
2308 vec<tree> vec_oprnds1 = vNULL;
2309 tree vop0;
2310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2311 int multi_step_cvt = 0;
2312 vec<tree> vec_dsts = vNULL;
2313 vec<tree> interm_types = vNULL;
2314 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2315 int op_type;
2316 enum machine_mode rhs_mode;
2317 unsigned short fltsz;
2319 /* Is STMT a vectorizable conversion? */
2321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2322 return false;
2324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2325 return false;
2327 if (!is_gimple_assign (stmt))
2328 return false;
2330 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2331 return false;
2333 code = gimple_assign_rhs_code (stmt);
2334 if (!CONVERT_EXPR_CODE_P (code)
2335 && code != FIX_TRUNC_EXPR
2336 && code != FLOAT_EXPR
2337 && code != WIDEN_MULT_EXPR
2338 && code != WIDEN_LSHIFT_EXPR)
2339 return false;
2341 op_type = TREE_CODE_LENGTH (code);
2343 /* Check types of lhs and rhs. */
2344 scalar_dest = gimple_assign_lhs (stmt);
2345 lhs_type = TREE_TYPE (scalar_dest);
2346 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2348 op0 = gimple_assign_rhs1 (stmt);
2349 rhs_type = TREE_TYPE (op0);
2351 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2352 && !((INTEGRAL_TYPE_P (lhs_type)
2353 && INTEGRAL_TYPE_P (rhs_type))
2354 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2355 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2356 return false;
2358 if ((INTEGRAL_TYPE_P (lhs_type)
2359 && (TYPE_PRECISION (lhs_type)
2360 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2361 || (INTEGRAL_TYPE_P (rhs_type)
2362 && (TYPE_PRECISION (rhs_type)
2363 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 "type conversion to/from bit-precision unsupported.");
2368 return false;
2371 /* Check the operands of the operation. */
2372 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2373 &def_stmt, &def, &dt[0], &vectype_in))
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "use not simple.");
2378 return false;
2380 if (op_type == binary_op)
2382 bool ok;
2384 op1 = gimple_assign_rhs2 (stmt);
2385 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2386 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2387 OP1. */
2388 if (CONSTANT_CLASS_P (op0))
2389 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2390 &def_stmt, &def, &dt[1], &vectype_in);
2391 else
2392 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2393 &def, &dt[1]);
2395 if (!ok)
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "use not simple.");
2400 return false;
2404 /* If op0 is an external or constant defs use a vector type of
2405 the same size as the output vector type. */
2406 if (!vectype_in)
2407 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2408 if (vec_stmt)
2409 gcc_assert (vectype_in);
2410 if (!vectype_in)
2412 if (dump_enabled_p ())
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415 "no vectype for scalar type ");
2416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2419 return false;
2422 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2423 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2424 if (nunits_in < nunits_out)
2425 modifier = NARROW;
2426 else if (nunits_out == nunits_in)
2427 modifier = NONE;
2428 else
2429 modifier = WIDEN;
2431 /* Multiple types in SLP are handled by creating the appropriate number of
2432 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2433 case of SLP. */
2434 if (slp_node || PURE_SLP_STMT (stmt_info))
2435 ncopies = 1;
2436 else if (modifier == NARROW)
2437 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2438 else
2439 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2441 /* Sanity check: make sure that at least one copy of the vectorized stmt
2442 needs to be generated. */
2443 gcc_assert (ncopies >= 1);
2445 /* Supportable by target? */
2446 switch (modifier)
2448 case NONE:
2449 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2450 return false;
2451 if (supportable_convert_operation (code, vectype_out, vectype_in,
2452 &decl1, &code1))
2453 break;
2454 /* FALLTHRU */
2455 unsupported:
2456 if (dump_enabled_p ())
2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458 "conversion not supported by target.");
2459 return false;
2461 case WIDEN:
2462 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2463 &code1, &code2, &multi_step_cvt,
2464 &interm_types))
2466 /* Binary widening operation can only be supported directly by the
2467 architecture. */
2468 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2469 break;
2472 if (code != FLOAT_EXPR
2473 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2474 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2475 goto unsupported;
2477 rhs_mode = TYPE_MODE (rhs_type);
2478 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2479 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2480 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2481 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2483 cvt_type
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486 if (cvt_type == NULL_TREE)
2487 goto unsupported;
2489 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2491 if (!supportable_convert_operation (code, vectype_out,
2492 cvt_type, &decl1, &codecvt1))
2493 goto unsupported;
2495 else if (!supportable_widening_operation (code, stmt, vectype_out,
2496 cvt_type, &codecvt1,
2497 &codecvt2, &multi_step_cvt,
2498 &interm_types))
2499 continue;
2500 else
2501 gcc_assert (multi_step_cvt == 0);
2503 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2504 vectype_in, &code1, &code2,
2505 &multi_step_cvt, &interm_types))
2506 break;
2509 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2510 goto unsupported;
2512 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2513 codecvt2 = ERROR_MARK;
2514 else
2516 multi_step_cvt++;
2517 interm_types.safe_push (cvt_type);
2518 cvt_type = NULL_TREE;
2520 break;
2522 case NARROW:
2523 gcc_assert (op_type == unary_op);
2524 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2525 &code1, &multi_step_cvt,
2526 &interm_types))
2527 break;
2529 if (code != FIX_TRUNC_EXPR
2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2531 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2532 goto unsupported;
2534 rhs_mode = TYPE_MODE (rhs_type);
2535 cvt_type
2536 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2537 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2538 if (cvt_type == NULL_TREE)
2539 goto unsupported;
2540 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2541 &decl1, &codecvt1))
2542 goto unsupported;
2543 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2544 &code1, &multi_step_cvt,
2545 &interm_types))
2546 break;
2547 goto unsupported;
2549 default:
2550 gcc_unreachable ();
2553 if (!vec_stmt) /* transformation not required. */
2555 if (dump_enabled_p ())
2556 dump_printf_loc (MSG_NOTE, vect_location,
2557 "=== vectorizable_conversion ===");
2558 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2560 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2561 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2563 else if (modifier == NARROW)
2565 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2566 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2568 else
2570 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2571 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2573 interm_types.release ();
2574 return true;
2577 /** Transform. **/
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_NOTE, vect_location,
2580 "transform conversion. ncopies = %d.", ncopies);
2582 if (op_type == binary_op)
2584 if (CONSTANT_CLASS_P (op0))
2585 op0 = fold_convert (TREE_TYPE (op1), op0);
2586 else if (CONSTANT_CLASS_P (op1))
2587 op1 = fold_convert (TREE_TYPE (op0), op1);
2590 /* In case of multi-step conversion, we first generate conversion operations
2591 to the intermediate types, and then from that types to the final one.
2592 We create vector destinations for the intermediate type (TYPES) received
2593 from supportable_*_operation, and store them in the correct order
2594 for future use in vect_create_vectorized_*_stmts (). */
2595 vec_dsts.create (multi_step_cvt + 1);
2596 vec_dest = vect_create_destination_var (scalar_dest,
2597 (cvt_type && modifier == WIDEN)
2598 ? cvt_type : vectype_out);
2599 vec_dsts.quick_push (vec_dest);
2601 if (multi_step_cvt)
2603 for (i = interm_types.length () - 1;
2604 interm_types.iterate (i, &intermediate_type); i--)
2606 vec_dest = vect_create_destination_var (scalar_dest,
2607 intermediate_type);
2608 vec_dsts.quick_push (vec_dest);
2612 if (cvt_type)
2613 vec_dest = vect_create_destination_var (scalar_dest,
2614 modifier == WIDEN
2615 ? vectype_out : cvt_type);
2617 if (!slp_node)
2619 if (modifier == NONE)
2620 vec_oprnds0.create (1);
2621 else if (modifier == WIDEN)
2623 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
2624 if (op_type == binary_op)
2625 vec_oprnds1.create (1);
2627 else
2628 vec_oprnds0.create (
2629 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2631 else if (code == WIDEN_LSHIFT_EXPR)
2632 vec_oprnds1.create (slp_node->vec_stmts_size);
2634 last_oprnd = op0;
2635 prev_stmt_info = NULL;
2636 switch (modifier)
2638 case NONE:
2639 for (j = 0; j < ncopies; j++)
2641 if (j == 0)
2642 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2643 -1);
2644 else
2645 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2647 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2649 /* Arguments are ready, create the new vector stmt. */
2650 if (code1 == CALL_EXPR)
2652 new_stmt = gimple_build_call (decl1, 1, vop0);
2653 new_temp = make_ssa_name (vec_dest, new_stmt);
2654 gimple_call_set_lhs (new_stmt, new_temp);
2656 else
2658 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2659 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2660 vop0, NULL);
2661 new_temp = make_ssa_name (vec_dest, new_stmt);
2662 gimple_assign_set_lhs (new_stmt, new_temp);
2665 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2666 if (slp_node)
2667 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2670 if (j == 0)
2671 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2672 else
2673 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2674 prev_stmt_info = vinfo_for_stmt (new_stmt);
2676 break;
2678 case WIDEN:
2679 /* In case the vectorization factor (VF) is bigger than the number
2680 of elements that we can fit in a vectype (nunits), we have to
2681 generate more than one vector stmt - i.e - we need to "unroll"
2682 the vector stmt by a factor VF/nunits. */
2683 for (j = 0; j < ncopies; j++)
2685 /* Handle uses. */
2686 if (j == 0)
2688 if (slp_node)
2690 if (code == WIDEN_LSHIFT_EXPR)
2692 unsigned int k;
2694 vec_oprnd1 = op1;
2695 /* Store vec_oprnd1 for every vector stmt to be created
2696 for SLP_NODE. We check during the analysis that all
2697 the shift arguments are the same. */
2698 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2699 vec_oprnds1.quick_push (vec_oprnd1);
2701 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2702 slp_node, -1);
2704 else
2705 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2706 &vec_oprnds1, slp_node, -1);
2708 else
2710 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2711 vec_oprnds0.quick_push (vec_oprnd0);
2712 if (op_type == binary_op)
2714 if (code == WIDEN_LSHIFT_EXPR)
2715 vec_oprnd1 = op1;
2716 else
2717 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2718 NULL);
2719 vec_oprnds1.quick_push (vec_oprnd1);
2723 else
2725 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2726 vec_oprnds0.truncate (0);
2727 vec_oprnds0.quick_push (vec_oprnd0);
2728 if (op_type == binary_op)
2730 if (code == WIDEN_LSHIFT_EXPR)
2731 vec_oprnd1 = op1;
2732 else
2733 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2734 vec_oprnd1);
2735 vec_oprnds1.truncate (0);
2736 vec_oprnds1.quick_push (vec_oprnd1);
2740 /* Arguments are ready. Create the new vector stmts. */
2741 for (i = multi_step_cvt; i >= 0; i--)
2743 tree this_dest = vec_dsts[i];
2744 enum tree_code c1 = code1, c2 = code2;
2745 if (i == 0 && codecvt2 != ERROR_MARK)
2747 c1 = codecvt1;
2748 c2 = codecvt2;
2750 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2751 &vec_oprnds1,
2752 stmt, this_dest, gsi,
2753 c1, c2, decl1, decl2,
2754 op_type);
2757 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2759 if (cvt_type)
2761 if (codecvt1 == CALL_EXPR)
2763 new_stmt = gimple_build_call (decl1, 1, vop0);
2764 new_temp = make_ssa_name (vec_dest, new_stmt);
2765 gimple_call_set_lhs (new_stmt, new_temp);
2767 else
2769 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2770 new_temp = make_ssa_name (vec_dest, NULL);
2771 new_stmt = gimple_build_assign_with_ops (codecvt1,
2772 new_temp,
2773 vop0, NULL);
2776 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2778 else
2779 new_stmt = SSA_NAME_DEF_STMT (vop0);
2781 if (slp_node)
2782 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2783 else
2785 if (!prev_stmt_info)
2786 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2787 else
2788 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2789 prev_stmt_info = vinfo_for_stmt (new_stmt);
2794 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2795 break;
2797 case NARROW:
2798 /* In case the vectorization factor (VF) is bigger than the number
2799 of elements that we can fit in a vectype (nunits), we have to
2800 generate more than one vector stmt - i.e - we need to "unroll"
2801 the vector stmt by a factor VF/nunits. */
2802 for (j = 0; j < ncopies; j++)
2804 /* Handle uses. */
2805 if (slp_node)
2806 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2807 slp_node, -1);
2808 else
2810 vec_oprnds0.truncate (0);
2811 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2812 vect_pow2 (multi_step_cvt) - 1);
2815 /* Arguments are ready. Create the new vector stmts. */
2816 if (cvt_type)
2817 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2819 if (codecvt1 == CALL_EXPR)
2821 new_stmt = gimple_build_call (decl1, 1, vop0);
2822 new_temp = make_ssa_name (vec_dest, new_stmt);
2823 gimple_call_set_lhs (new_stmt, new_temp);
2825 else
2827 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2828 new_temp = make_ssa_name (vec_dest, NULL);
2829 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2830 vop0, NULL);
2833 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2834 vec_oprnds0[i] = new_temp;
2837 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2838 stmt, vec_dsts, gsi,
2839 slp_node, code1,
2840 &prev_stmt_info);
2843 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2844 break;
2847 vec_oprnds0.release ();
2848 vec_oprnds1.release ();
2849 vec_dsts.release ();
2850 interm_types.release ();
2852 return true;
2856 /* Function vectorizable_assignment.
2858 Check if STMT performs an assignment (copy) that can be vectorized.
2859 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2860 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2861 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2863 static bool
2864 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2865 gimple *vec_stmt, slp_tree slp_node)
2867 tree vec_dest;
2868 tree scalar_dest;
2869 tree op;
2870 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2871 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2873 tree new_temp;
2874 tree def;
2875 gimple def_stmt;
2876 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2877 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2878 int ncopies;
2879 int i, j;
2880 vec<tree> vec_oprnds = vNULL;
2881 tree vop;
2882 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2883 gimple new_stmt = NULL;
2884 stmt_vec_info prev_stmt_info = NULL;
2885 enum tree_code code;
2886 tree vectype_in;
2888 /* Multiple types in SLP are handled by creating the appropriate number of
2889 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2890 case of SLP. */
2891 if (slp_node || PURE_SLP_STMT (stmt_info))
2892 ncopies = 1;
2893 else
2894 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2896 gcc_assert (ncopies >= 1);
2898 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2899 return false;
2901 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2902 return false;
2904 /* Is vectorizable assignment? */
2905 if (!is_gimple_assign (stmt))
2906 return false;
2908 scalar_dest = gimple_assign_lhs (stmt);
2909 if (TREE_CODE (scalar_dest) != SSA_NAME)
2910 return false;
2912 code = gimple_assign_rhs_code (stmt);
2913 if (gimple_assign_single_p (stmt)
2914 || code == PAREN_EXPR
2915 || CONVERT_EXPR_CODE_P (code))
2916 op = gimple_assign_rhs1 (stmt);
2917 else
2918 return false;
2920 if (code == VIEW_CONVERT_EXPR)
2921 op = TREE_OPERAND (op, 0);
2923 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2924 &def_stmt, &def, &dt[0], &vectype_in))
2926 if (dump_enabled_p ())
2927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2928 "use not simple.");
2929 return false;
2932 /* We can handle NOP_EXPR conversions that do not change the number
2933 of elements or the vector size. */
2934 if ((CONVERT_EXPR_CODE_P (code)
2935 || code == VIEW_CONVERT_EXPR)
2936 && (!vectype_in
2937 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2938 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2939 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2940 return false;
2942 /* We do not handle bit-precision changes. */
2943 if ((CONVERT_EXPR_CODE_P (code)
2944 || code == VIEW_CONVERT_EXPR)
2945 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2946 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2947 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2948 || ((TYPE_PRECISION (TREE_TYPE (op))
2949 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2950 /* But a conversion that does not change the bit-pattern is ok. */
2951 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2952 > TYPE_PRECISION (TREE_TYPE (op)))
2953 && TYPE_UNSIGNED (TREE_TYPE (op))))
2955 if (dump_enabled_p ())
2956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2957 "type conversion to/from bit-precision "
2958 "unsupported.");
2959 return false;
2962 if (!vec_stmt) /* transformation not required. */
2964 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2965 if (dump_enabled_p ())
2966 dump_printf_loc (MSG_NOTE, vect_location,
2967 "=== vectorizable_assignment ===");
2968 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2969 return true;
2972 /** Transform. **/
2973 if (dump_enabled_p ())
2974 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
2976 /* Handle def. */
2977 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2979 /* Handle use. */
2980 for (j = 0; j < ncopies; j++)
2982 /* Handle uses. */
2983 if (j == 0)
2984 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2985 else
2986 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2988 /* Arguments are ready. create the new vector stmt. */
2989 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2991 if (CONVERT_EXPR_CODE_P (code)
2992 || code == VIEW_CONVERT_EXPR)
2993 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2994 new_stmt = gimple_build_assign (vec_dest, vop);
2995 new_temp = make_ssa_name (vec_dest, new_stmt);
2996 gimple_assign_set_lhs (new_stmt, new_temp);
2997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2998 if (slp_node)
2999 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3002 if (slp_node)
3003 continue;
3005 if (j == 0)
3006 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3007 else
3008 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3010 prev_stmt_info = vinfo_for_stmt (new_stmt);
3013 vec_oprnds.release ();
3014 return true;
3018 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3019 either as shift by a scalar or by a vector. */
3021 bool
3022 vect_supportable_shift (enum tree_code code, tree scalar_type)
3025 enum machine_mode vec_mode;
3026 optab optab;
3027 int icode;
3028 tree vectype;
3030 vectype = get_vectype_for_scalar_type (scalar_type);
3031 if (!vectype)
3032 return false;
3034 optab = optab_for_tree_code (code, vectype, optab_scalar);
3035 if (!optab
3036 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3038 optab = optab_for_tree_code (code, vectype, optab_vector);
3039 if (!optab
3040 || (optab_handler (optab, TYPE_MODE (vectype))
3041 == CODE_FOR_nothing))
3042 return false;
3045 vec_mode = TYPE_MODE (vectype);
3046 icode = (int) optab_handler (optab, vec_mode);
3047 if (icode == CODE_FOR_nothing)
3048 return false;
3050 return true;
3054 /* Function vectorizable_shift.
3056 Check if STMT performs a shift operation that can be vectorized.
3057 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3058 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3059 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3061 static bool
3062 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3063 gimple *vec_stmt, slp_tree slp_node)
3065 tree vec_dest;
3066 tree scalar_dest;
3067 tree op0, op1 = NULL;
3068 tree vec_oprnd1 = NULL_TREE;
3069 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3070 tree vectype;
3071 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3072 enum tree_code code;
3073 enum machine_mode vec_mode;
3074 tree new_temp;
3075 optab optab;
3076 int icode;
3077 enum machine_mode optab_op2_mode;
3078 tree def;
3079 gimple def_stmt;
3080 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3081 gimple new_stmt = NULL;
3082 stmt_vec_info prev_stmt_info;
3083 int nunits_in;
3084 int nunits_out;
3085 tree vectype_out;
3086 tree op1_vectype;
3087 int ncopies;
3088 int j, i;
3089 vec<tree> vec_oprnds0 = vNULL;
3090 vec<tree> vec_oprnds1 = vNULL;
3091 tree vop0, vop1;
3092 unsigned int k;
3093 bool scalar_shift_arg = true;
3094 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3095 int vf;
3097 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3098 return false;
3100 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3101 return false;
3103 /* Is STMT a vectorizable binary/unary operation? */
3104 if (!is_gimple_assign (stmt))
3105 return false;
3107 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3108 return false;
3110 code = gimple_assign_rhs_code (stmt);
3112 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3113 || code == RROTATE_EXPR))
3114 return false;
3116 scalar_dest = gimple_assign_lhs (stmt);
3117 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3118 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3119 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3121 if (dump_enabled_p ())
3122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3123 "bit-precision shifts not supported.");
3124 return false;
3127 op0 = gimple_assign_rhs1 (stmt);
3128 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3129 &def_stmt, &def, &dt[0], &vectype))
3131 if (dump_enabled_p ())
3132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3133 "use not simple.");
3134 return false;
3136 /* If op0 is an external or constant def use a vector type with
3137 the same size as the output vector type. */
3138 if (!vectype)
3139 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3140 if (vec_stmt)
3141 gcc_assert (vectype);
3142 if (!vectype)
3144 if (dump_enabled_p ())
3145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3146 "no vectype for scalar type ");
3147 return false;
3150 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3151 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3152 if (nunits_out != nunits_in)
3153 return false;
3155 op1 = gimple_assign_rhs2 (stmt);
3156 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3157 &def, &dt[1], &op1_vectype))
3159 if (dump_enabled_p ())
3160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3161 "use not simple.");
3162 return false;
3165 if (loop_vinfo)
3166 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3167 else
3168 vf = 1;
3170 /* Multiple types in SLP are handled by creating the appropriate number of
3171 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3172 case of SLP. */
3173 if (slp_node || PURE_SLP_STMT (stmt_info))
3174 ncopies = 1;
3175 else
3176 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3178 gcc_assert (ncopies >= 1);
3180 /* Determine whether the shift amount is a vector, or scalar. If the
3181 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3183 if (dt[1] == vect_internal_def && !slp_node)
3184 scalar_shift_arg = false;
3185 else if (dt[1] == vect_constant_def
3186 || dt[1] == vect_external_def
3187 || dt[1] == vect_internal_def)
3189 /* In SLP, need to check whether the shift count is the same,
3190 in loops if it is a constant or invariant, it is always
3191 a scalar shift. */
3192 if (slp_node)
3194 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3195 gimple slpstmt;
3197 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3198 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3199 scalar_shift_arg = false;
3202 else
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206 "operand mode requires invariant argument.");
3207 return false;
3210 /* Vector shifted by vector. */
3211 if (!scalar_shift_arg)
3213 optab = optab_for_tree_code (code, vectype, optab_vector);
3214 if (dump_enabled_p ())
3215 dump_printf_loc (MSG_NOTE, vect_location,
3216 "vector/vector shift/rotate found.");
3218 if (!op1_vectype)
3219 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3220 if (op1_vectype == NULL_TREE
3221 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3223 if (dump_enabled_p ())
3224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3225 "unusable type for last operand in"
3226 " vector/vector shift/rotate.");
3227 return false;
3230 /* See if the machine has a vector shifted by scalar insn and if not
3231 then see if it has a vector shifted by vector insn. */
3232 else
3234 optab = optab_for_tree_code (code, vectype, optab_scalar);
3235 if (optab
3236 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3238 if (dump_enabled_p ())
3239 dump_printf_loc (MSG_NOTE, vect_location,
3240 "vector/scalar shift/rotate found.");
3242 else
3244 optab = optab_for_tree_code (code, vectype, optab_vector);
3245 if (optab
3246 && (optab_handler (optab, TYPE_MODE (vectype))
3247 != CODE_FOR_nothing))
3249 scalar_shift_arg = false;
3251 if (dump_enabled_p ())
3252 dump_printf_loc (MSG_NOTE, vect_location,
3253 "vector/vector shift/rotate found.");
3255 /* Unlike the other binary operators, shifts/rotates have
3256 the rhs being int, instead of the same type as the lhs,
3257 so make sure the scalar is the right type if we are
3258 dealing with vectors of long long/long/short/char. */
3259 if (dt[1] == vect_constant_def)
3260 op1 = fold_convert (TREE_TYPE (vectype), op1);
3261 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3262 TREE_TYPE (op1)))
3264 if (slp_node
3265 && TYPE_MODE (TREE_TYPE (vectype))
3266 != TYPE_MODE (TREE_TYPE (op1)))
3268 if (dump_enabled_p ())
3269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3270 "unusable type for last operand in"
3271 " vector/vector shift/rotate.");
3272 return false;
3274 if (vec_stmt && !slp_node)
3276 op1 = fold_convert (TREE_TYPE (vectype), op1);
3277 op1 = vect_init_vector (stmt, op1,
3278 TREE_TYPE (vectype), NULL);
3285 /* Supportable by target? */
3286 if (!optab)
3288 if (dump_enabled_p ())
3289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3290 "no optab.");
3291 return false;
3293 vec_mode = TYPE_MODE (vectype);
3294 icode = (int) optab_handler (optab, vec_mode);
3295 if (icode == CODE_FOR_nothing)
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3299 "op not supported by target.");
3300 /* Check only during analysis. */
3301 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3302 || (vf < vect_min_worthwhile_factor (code)
3303 && !vec_stmt))
3304 return false;
3305 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3309 /* Worthwhile without SIMD support? Check only during analysis. */
3310 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3311 && vf < vect_min_worthwhile_factor (code)
3312 && !vec_stmt)
3314 if (dump_enabled_p ())
3315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3316 "not worthwhile without SIMD support.");
3317 return false;
3320 if (!vec_stmt) /* transformation not required. */
3322 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3323 if (dump_enabled_p ())
3324 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3325 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3326 return true;
3329 /** Transform. **/
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_NOTE, vect_location,
3333 "transform binary/unary operation.");
3335 /* Handle def. */
3336 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3338 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3339 created in the previous stages of the recursion, so no allocation is
3340 needed, except for the case of shift with scalar shift argument. In that
3341 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3342 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3343 In case of loop-based vectorization we allocate VECs of size 1. We
3344 allocate VEC_OPRNDS1 only in case of binary operation. */
3345 if (!slp_node)
3347 vec_oprnds0.create (1);
3348 vec_oprnds1.create (1);
3350 else if (scalar_shift_arg)
3351 vec_oprnds1.create (slp_node->vec_stmts_size);
3353 prev_stmt_info = NULL;
3354 for (j = 0; j < ncopies; j++)
3356 /* Handle uses. */
3357 if (j == 0)
3359 if (scalar_shift_arg)
3361 /* Vector shl and shr insn patterns can be defined with scalar
3362 operand 2 (shift operand). In this case, use constant or loop
3363 invariant op1 directly, without extending it to vector mode
3364 first. */
3365 optab_op2_mode = insn_data[icode].operand[2].mode;
3366 if (!VECTOR_MODE_P (optab_op2_mode))
3368 if (dump_enabled_p ())
3369 dump_printf_loc (MSG_NOTE, vect_location,
3370 "operand 1 using scalar mode.");
3371 vec_oprnd1 = op1;
3372 vec_oprnds1.quick_push (vec_oprnd1);
3373 if (slp_node)
3375 /* Store vec_oprnd1 for every vector stmt to be created
3376 for SLP_NODE. We check during the analysis that all
3377 the shift arguments are the same.
3378 TODO: Allow different constants for different vector
3379 stmts generated for an SLP instance. */
3380 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3381 vec_oprnds1.quick_push (vec_oprnd1);
3386 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3387 (a special case for certain kind of vector shifts); otherwise,
3388 operand 1 should be of a vector type (the usual case). */
3389 if (vec_oprnd1)
3390 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3391 slp_node, -1);
3392 else
3393 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3394 slp_node, -1);
3396 else
3397 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3399 /* Arguments are ready. Create the new vector stmt. */
3400 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3402 vop1 = vec_oprnds1[i];
3403 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3404 new_temp = make_ssa_name (vec_dest, new_stmt);
3405 gimple_assign_set_lhs (new_stmt, new_temp);
3406 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3407 if (slp_node)
3408 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3411 if (slp_node)
3412 continue;
3414 if (j == 0)
3415 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3416 else
3417 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3418 prev_stmt_info = vinfo_for_stmt (new_stmt);
3421 vec_oprnds0.release ();
3422 vec_oprnds1.release ();
3424 return true;
3428 static tree permute_vec_elements (tree, tree, tree, gimple,
3429 gimple_stmt_iterator *);
3432 /* Function vectorizable_operation.
3434 Check if STMT performs a binary, unary or ternary operation that can
3435 be vectorized.
3436 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3437 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3438 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3440 static bool
3441 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3442 gimple *vec_stmt, slp_tree slp_node)
3444 tree vec_dest;
3445 tree scalar_dest;
3446 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3448 tree vectype;
3449 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3450 enum tree_code code;
3451 enum machine_mode vec_mode;
3452 tree new_temp;
3453 int op_type;
3454 optab optab;
3455 int icode;
3456 tree def;
3457 gimple def_stmt;
3458 enum vect_def_type dt[3]
3459 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3460 gimple new_stmt = NULL;
3461 stmt_vec_info prev_stmt_info;
3462 int nunits_in;
3463 int nunits_out;
3464 tree vectype_out;
3465 int ncopies;
3466 int j, i;
3467 vec<tree> vec_oprnds0 = vNULL;
3468 vec<tree> vec_oprnds1 = vNULL;
3469 vec<tree> vec_oprnds2 = vNULL;
3470 tree vop0, vop1, vop2;
3471 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3472 int vf;
3474 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3475 return false;
3477 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3478 return false;
3480 /* Is STMT a vectorizable binary/unary operation? */
3481 if (!is_gimple_assign (stmt))
3482 return false;
3484 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3485 return false;
3487 code = gimple_assign_rhs_code (stmt);
3489 /* For pointer addition, we should use the normal plus for
3490 the vector addition. */
3491 if (code == POINTER_PLUS_EXPR)
3492 code = PLUS_EXPR;
3494 /* Support only unary or binary operations. */
3495 op_type = TREE_CODE_LENGTH (code);
3496 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3498 if (dump_enabled_p ())
3499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3500 "num. args = %d (not unary/binary/ternary op).",
3501 op_type);
3502 return false;
3505 scalar_dest = gimple_assign_lhs (stmt);
3506 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3508 /* Most operations cannot handle bit-precision types without extra
3509 truncations. */
3510 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3511 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3512 /* Exception are bitwise binary operations. */
3513 && code != BIT_IOR_EXPR
3514 && code != BIT_XOR_EXPR
3515 && code != BIT_AND_EXPR)
3517 if (dump_enabled_p ())
3518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3519 "bit-precision arithmetic not supported.");
3520 return false;
3523 op0 = gimple_assign_rhs1 (stmt);
3524 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3525 &def_stmt, &def, &dt[0], &vectype))
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3529 "use not simple.");
3530 return false;
3532 /* If op0 is an external or constant def use a vector type with
3533 the same size as the output vector type. */
3534 if (!vectype)
3535 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3536 if (vec_stmt)
3537 gcc_assert (vectype);
3538 if (!vectype)
3540 if (dump_enabled_p ())
3542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3543 "no vectype for scalar type ");
3544 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3545 TREE_TYPE (op0));
3548 return false;
3551 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3552 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3553 if (nunits_out != nunits_in)
3554 return false;
3556 if (op_type == binary_op || op_type == ternary_op)
3558 op1 = gimple_assign_rhs2 (stmt);
3559 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3560 &def, &dt[1]))
3562 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3564 "use not simple.");
3565 return false;
3568 if (op_type == ternary_op)
3570 op2 = gimple_assign_rhs3 (stmt);
3571 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3572 &def, &dt[2]))
3574 if (dump_enabled_p ())
3575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3576 "use not simple.");
3577 return false;
3581 if (loop_vinfo)
3582 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3583 else
3584 vf = 1;
3586 /* Multiple types in SLP are handled by creating the appropriate number of
3587 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3588 case of SLP. */
3589 if (slp_node || PURE_SLP_STMT (stmt_info))
3590 ncopies = 1;
3591 else
3592 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3594 gcc_assert (ncopies >= 1);
3596 /* Shifts are handled in vectorizable_shift (). */
3597 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3598 || code == RROTATE_EXPR)
3599 return false;
3601 /* Supportable by target? */
3603 vec_mode = TYPE_MODE (vectype);
3604 if (code == MULT_HIGHPART_EXPR)
3606 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3607 icode = LAST_INSN_CODE;
3608 else
3609 icode = CODE_FOR_nothing;
3611 else
3613 optab = optab_for_tree_code (code, vectype, optab_default);
3614 if (!optab)
3616 if (dump_enabled_p ())
3617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3618 "no optab.");
3619 return false;
3621 icode = (int) optab_handler (optab, vec_mode);
3624 if (icode == CODE_FOR_nothing)
3626 if (dump_enabled_p ())
3627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3628 "op not supported by target.");
3629 /* Check only during analysis. */
3630 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3631 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3632 return false;
3633 if (dump_enabled_p ())
3634 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3637 /* Worthwhile without SIMD support? Check only during analysis. */
3638 if (!VECTOR_MODE_P (vec_mode)
3639 && !vec_stmt
3640 && vf < vect_min_worthwhile_factor (code))
3642 if (dump_enabled_p ())
3643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3644 "not worthwhile without SIMD support.");
3645 return false;
3648 if (!vec_stmt) /* transformation not required. */
3650 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3651 if (dump_enabled_p ())
3652 dump_printf_loc (MSG_NOTE, vect_location,
3653 "=== vectorizable_operation ===");
3654 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3655 return true;
3658 /** Transform. **/
3660 if (dump_enabled_p ())
3661 dump_printf_loc (MSG_NOTE, vect_location,
3662 "transform binary/unary operation.");
3664 /* Handle def. */
3665 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3667 /* In case the vectorization factor (VF) is bigger than the number
3668 of elements that we can fit in a vectype (nunits), we have to generate
3669 more than one vector stmt - i.e - we need to "unroll" the
3670 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3671 from one copy of the vector stmt to the next, in the field
3672 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3673 stages to find the correct vector defs to be used when vectorizing
3674 stmts that use the defs of the current stmt. The example below
3675 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3676 we need to create 4 vectorized stmts):
3678 before vectorization:
3679 RELATED_STMT VEC_STMT
3680 S1: x = memref - -
3681 S2: z = x + 1 - -
3683 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3684 there):
3685 RELATED_STMT VEC_STMT
3686 VS1_0: vx0 = memref0 VS1_1 -
3687 VS1_1: vx1 = memref1 VS1_2 -
3688 VS1_2: vx2 = memref2 VS1_3 -
3689 VS1_3: vx3 = memref3 - -
3690 S1: x = load - VS1_0
3691 S2: z = x + 1 - -
3693 step2: vectorize stmt S2 (done here):
3694 To vectorize stmt S2 we first need to find the relevant vector
3695 def for the first operand 'x'. This is, as usual, obtained from
3696 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3697 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3698 relevant vector def 'vx0'. Having found 'vx0' we can generate
3699 the vector stmt VS2_0, and as usual, record it in the
3700 STMT_VINFO_VEC_STMT of stmt S2.
3701 When creating the second copy (VS2_1), we obtain the relevant vector
3702 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3703 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3704 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3705 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3706 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3707 chain of stmts and pointers:
3708 RELATED_STMT VEC_STMT
3709 VS1_0: vx0 = memref0 VS1_1 -
3710 VS1_1: vx1 = memref1 VS1_2 -
3711 VS1_2: vx2 = memref2 VS1_3 -
3712 VS1_3: vx3 = memref3 - -
3713 S1: x = load - VS1_0
3714 VS2_0: vz0 = vx0 + v1 VS2_1 -
3715 VS2_1: vz1 = vx1 + v1 VS2_2 -
3716 VS2_2: vz2 = vx2 + v1 VS2_3 -
3717 VS2_3: vz3 = vx3 + v1 - -
3718 S2: z = x + 1 - VS2_0 */
3720 prev_stmt_info = NULL;
3721 for (j = 0; j < ncopies; j++)
3723 /* Handle uses. */
3724 if (j == 0)
3726 if (op_type == binary_op || op_type == ternary_op)
3727 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3728 slp_node, -1);
3729 else
3730 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3731 slp_node, -1);
3732 if (op_type == ternary_op)
3734 vec_oprnds2.create (1);
3735 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3736 stmt,
3737 NULL));
3740 else
3742 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3743 if (op_type == ternary_op)
3745 tree vec_oprnd = vec_oprnds2.pop ();
3746 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3747 vec_oprnd));
3751 /* Arguments are ready. Create the new vector stmt. */
3752 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3754 vop1 = ((op_type == binary_op || op_type == ternary_op)
3755 ? vec_oprnds1[i] : NULL_TREE);
3756 vop2 = ((op_type == ternary_op)
3757 ? vec_oprnds2[i] : NULL_TREE);
3758 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3759 vop0, vop1, vop2);
3760 new_temp = make_ssa_name (vec_dest, new_stmt);
3761 gimple_assign_set_lhs (new_stmt, new_temp);
3762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3763 if (slp_node)
3764 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3767 if (slp_node)
3768 continue;
3770 if (j == 0)
3771 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3772 else
3773 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3774 prev_stmt_info = vinfo_for_stmt (new_stmt);
3777 vec_oprnds0.release ();
3778 vec_oprnds1.release ();
3779 vec_oprnds2.release ();
3781 return true;
3785 /* Function vectorizable_store.
3787 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3788 can be vectorized.
3789 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3790 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3791 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3793 static bool
3794 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3795 slp_tree slp_node)
3797 tree scalar_dest;
3798 tree data_ref;
3799 tree op;
3800 tree vec_oprnd = NULL_TREE;
3801 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3802 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3803 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3804 tree elem_type;
3805 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3806 struct loop *loop = NULL;
3807 enum machine_mode vec_mode;
3808 tree dummy;
3809 enum dr_alignment_support alignment_support_scheme;
3810 tree def;
3811 gimple def_stmt;
3812 enum vect_def_type dt;
3813 stmt_vec_info prev_stmt_info = NULL;
3814 tree dataref_ptr = NULL_TREE;
3815 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3816 int ncopies;
3817 int j;
3818 gimple next_stmt, first_stmt = NULL;
3819 bool grouped_store = false;
3820 bool store_lanes_p = false;
3821 unsigned int group_size, i;
3822 vec<tree> dr_chain = vNULL;
3823 vec<tree> oprnds = vNULL;
3824 vec<tree> result_chain = vNULL;
3825 bool inv_p;
3826 vec<tree> vec_oprnds = vNULL;
3827 bool slp = (slp_node != NULL);
3828 unsigned int vec_num;
3829 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3830 tree aggr_type;
3832 if (loop_vinfo)
3833 loop = LOOP_VINFO_LOOP (loop_vinfo);
3835 /* Multiple types in SLP are handled by creating the appropriate number of
3836 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3837 case of SLP. */
3838 if (slp || PURE_SLP_STMT (stmt_info))
3839 ncopies = 1;
3840 else
3841 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3843 gcc_assert (ncopies >= 1);
3845 /* FORNOW. This restriction should be relaxed. */
3846 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3848 if (dump_enabled_p ())
3849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3850 "multiple types in nested loop.");
3851 return false;
3854 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3855 return false;
3857 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3858 return false;
3860 /* Is vectorizable store? */
3862 if (!is_gimple_assign (stmt))
3863 return false;
3865 scalar_dest = gimple_assign_lhs (stmt);
3866 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3867 && is_pattern_stmt_p (stmt_info))
3868 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3869 if (TREE_CODE (scalar_dest) != ARRAY_REF
3870 && TREE_CODE (scalar_dest) != INDIRECT_REF
3871 && TREE_CODE (scalar_dest) != COMPONENT_REF
3872 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3873 && TREE_CODE (scalar_dest) != REALPART_EXPR
3874 && TREE_CODE (scalar_dest) != MEM_REF)
3875 return false;
3877 gcc_assert (gimple_assign_single_p (stmt));
3878 op = gimple_assign_rhs1 (stmt);
3879 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3880 &def, &dt))
3882 if (dump_enabled_p ())
3883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3884 "use not simple.");
3885 return false;
3888 elem_type = TREE_TYPE (vectype);
3889 vec_mode = TYPE_MODE (vectype);
3891 /* FORNOW. In some cases can vectorize even if data-type not supported
3892 (e.g. - array initialization with 0). */
3893 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3894 return false;
3896 if (!STMT_VINFO_DATA_REF (stmt_info))
3897 return false;
3899 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3900 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3901 size_zero_node) < 0)
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "negative step for store.");
3906 return false;
3909 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3911 grouped_store = true;
3912 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3913 if (!slp && !PURE_SLP_STMT (stmt_info))
3915 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3916 if (vect_store_lanes_supported (vectype, group_size))
3917 store_lanes_p = true;
3918 else if (!vect_grouped_store_supported (vectype, group_size))
3919 return false;
3922 if (first_stmt == stmt)
3924 /* STMT is the leader of the group. Check the operands of all the
3925 stmts of the group. */
3926 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3927 while (next_stmt)
3929 gcc_assert (gimple_assign_single_p (next_stmt));
3930 op = gimple_assign_rhs1 (next_stmt);
3931 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3932 &def_stmt, &def, &dt))
3934 if (dump_enabled_p ())
3935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3936 "use not simple.");
3937 return false;
3939 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3944 if (!vec_stmt) /* transformation not required. */
3946 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3947 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3948 NULL, NULL, NULL);
3949 return true;
3952 /** Transform. **/
3954 if (grouped_store)
3956 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3957 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3959 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3961 /* FORNOW */
3962 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3964 /* We vectorize all the stmts of the interleaving group when we
3965 reach the last stmt in the group. */
3966 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3967 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3968 && !slp)
3970 *vec_stmt = NULL;
3971 return true;
3974 if (slp)
3976 grouped_store = false;
3977 /* VEC_NUM is the number of vect stmts to be created for this
3978 group. */
3979 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3980 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3981 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3982 op = gimple_assign_rhs1 (first_stmt);
3984 else
3985 /* VEC_NUM is the number of vect stmts to be created for this
3986 group. */
3987 vec_num = group_size;
3989 else
3991 first_stmt = stmt;
3992 first_dr = dr;
3993 group_size = vec_num = 1;
3996 if (dump_enabled_p ())
3997 dump_printf_loc (MSG_NOTE, vect_location,
3998 "transform store. ncopies = %d", ncopies);
4000 dr_chain.create (group_size);
4001 oprnds.create (group_size);
4003 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4004 gcc_assert (alignment_support_scheme);
4005 /* Targets with store-lane instructions must not require explicit
4006 realignment. */
4007 gcc_assert (!store_lanes_p
4008 || alignment_support_scheme == dr_aligned
4009 || alignment_support_scheme == dr_unaligned_supported);
4011 if (store_lanes_p)
4012 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4013 else
4014 aggr_type = vectype;
4016 /* In case the vectorization factor (VF) is bigger than the number
4017 of elements that we can fit in a vectype (nunits), we have to generate
4018 more than one vector stmt - i.e - we need to "unroll" the
4019 vector stmt by a factor VF/nunits. For more details see documentation in
4020 vect_get_vec_def_for_copy_stmt. */
4022 /* In case of interleaving (non-unit grouped access):
4024 S1: &base + 2 = x2
4025 S2: &base = x0
4026 S3: &base + 1 = x1
4027 S4: &base + 3 = x3
4029 We create vectorized stores starting from base address (the access of the
4030 first stmt in the chain (S2 in the above example), when the last store stmt
4031 of the chain (S4) is reached:
4033 VS1: &base = vx2
4034 VS2: &base + vec_size*1 = vx0
4035 VS3: &base + vec_size*2 = vx1
4036 VS4: &base + vec_size*3 = vx3
4038 Then permutation statements are generated:
4040 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4041 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4044 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4045 (the order of the data-refs in the output of vect_permute_store_chain
4046 corresponds to the order of scalar stmts in the interleaving chain - see
4047 the documentation of vect_permute_store_chain()).
4049 In case of both multiple types and interleaving, above vector stores and
4050 permutation stmts are created for every copy. The result vector stmts are
4051 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4052 STMT_VINFO_RELATED_STMT for the next copies.
4055 prev_stmt_info = NULL;
4056 for (j = 0; j < ncopies; j++)
4058 gimple new_stmt;
4059 gimple ptr_incr;
4061 if (j == 0)
4063 if (slp)
4065 /* Get vectorized arguments for SLP_NODE. */
4066 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4067 NULL, slp_node, -1);
4069 vec_oprnd = vec_oprnds[0];
4071 else
4073 /* For interleaved stores we collect vectorized defs for all the
4074 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4075 used as an input to vect_permute_store_chain(), and OPRNDS as
4076 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4078 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4079 OPRNDS are of size 1. */
4080 next_stmt = first_stmt;
4081 for (i = 0; i < group_size; i++)
4083 /* Since gaps are not supported for interleaved stores,
4084 GROUP_SIZE is the exact number of stmts in the chain.
4085 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4086 there is no interleaving, GROUP_SIZE is 1, and only one
4087 iteration of the loop will be executed. */
4088 gcc_assert (next_stmt
4089 && gimple_assign_single_p (next_stmt));
4090 op = gimple_assign_rhs1 (next_stmt);
4092 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4093 NULL);
4094 dr_chain.quick_push (vec_oprnd);
4095 oprnds.quick_push (vec_oprnd);
4096 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4100 /* We should have catched mismatched types earlier. */
4101 gcc_assert (useless_type_conversion_p (vectype,
4102 TREE_TYPE (vec_oprnd)));
4103 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4104 NULL_TREE, &dummy, gsi,
4105 &ptr_incr, false, &inv_p);
4106 gcc_assert (bb_vinfo || !inv_p);
4108 else
4110 /* For interleaved stores we created vectorized defs for all the
4111 defs stored in OPRNDS in the previous iteration (previous copy).
4112 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4113 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4114 next copy.
4115 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4116 OPRNDS are of size 1. */
4117 for (i = 0; i < group_size; i++)
4119 op = oprnds[i];
4120 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4121 &def, &dt);
4122 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4123 dr_chain[i] = vec_oprnd;
4124 oprnds[i] = vec_oprnd;
4126 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4127 TYPE_SIZE_UNIT (aggr_type));
4130 if (store_lanes_p)
4132 tree vec_array;
4134 /* Combine all the vectors into an array. */
4135 vec_array = create_vector_array (vectype, vec_num);
4136 for (i = 0; i < vec_num; i++)
4138 vec_oprnd = dr_chain[i];
4139 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4142 /* Emit:
4143 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4144 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4145 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4146 gimple_call_set_lhs (new_stmt, data_ref);
4147 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4149 else
4151 new_stmt = NULL;
4152 if (grouped_store)
4154 result_chain.create (group_size);
4155 /* Permute. */
4156 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4157 &result_chain);
4160 next_stmt = first_stmt;
4161 for (i = 0; i < vec_num; i++)
4163 unsigned align, misalign;
4165 if (i > 0)
4166 /* Bump the vector pointer. */
4167 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4168 stmt, NULL_TREE);
4170 if (slp)
4171 vec_oprnd = vec_oprnds[i];
4172 else if (grouped_store)
4173 /* For grouped stores vectorized defs are interleaved in
4174 vect_permute_store_chain(). */
4175 vec_oprnd = result_chain[i];
4177 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4178 build_int_cst (reference_alias_ptr_type
4179 (DR_REF (first_dr)), 0));
4180 align = TYPE_ALIGN_UNIT (vectype);
4181 if (aligned_access_p (first_dr))
4182 misalign = 0;
4183 else if (DR_MISALIGNMENT (first_dr) == -1)
4185 TREE_TYPE (data_ref)
4186 = build_aligned_type (TREE_TYPE (data_ref),
4187 TYPE_ALIGN (elem_type));
4188 align = TYPE_ALIGN_UNIT (elem_type);
4189 misalign = 0;
4191 else
4193 TREE_TYPE (data_ref)
4194 = build_aligned_type (TREE_TYPE (data_ref),
4195 TYPE_ALIGN (elem_type));
4196 misalign = DR_MISALIGNMENT (first_dr);
4198 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4199 misalign);
4201 /* Arguments are ready. Create the new vector stmt. */
4202 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4205 if (slp)
4206 continue;
4208 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4209 if (!next_stmt)
4210 break;
4213 if (!slp)
4215 if (j == 0)
4216 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4217 else
4218 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4219 prev_stmt_info = vinfo_for_stmt (new_stmt);
4223 dr_chain.release ();
4224 oprnds.release ();
4225 result_chain.release ();
4226 vec_oprnds.release ();
4228 return true;
4231 /* Given a vector type VECTYPE and permutation SEL returns
4232 the VECTOR_CST mask that implements the permutation of the
4233 vector elements. If that is impossible to do, returns NULL. */
4235 tree
4236 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4238 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4239 int i, nunits;
4241 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4243 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4244 return NULL;
4246 mask_elt_type = lang_hooks.types.type_for_mode
4247 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4248 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4250 mask_elts = XALLOCAVEC (tree, nunits);
4251 for (i = nunits - 1; i >= 0; i--)
4252 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4253 mask_vec = build_vector (mask_type, mask_elts);
4255 return mask_vec;
4258 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4259 reversal of the vector elements. If that is impossible to do,
4260 returns NULL. */
4262 static tree
4263 perm_mask_for_reverse (tree vectype)
4265 int i, nunits;
4266 unsigned char *sel;
4268 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4269 sel = XALLOCAVEC (unsigned char, nunits);
4271 for (i = 0; i < nunits; ++i)
4272 sel[i] = nunits - 1 - i;
4274 return vect_gen_perm_mask (vectype, sel);
4277 /* Given a vector variable X and Y, that was generated for the scalar
4278 STMT, generate instructions to permute the vector elements of X and Y
4279 using permutation mask MASK_VEC, insert them at *GSI and return the
4280 permuted vector variable. */
4282 static tree
4283 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4284 gimple_stmt_iterator *gsi)
4286 tree vectype = TREE_TYPE (x);
4287 tree perm_dest, data_ref;
4288 gimple perm_stmt;
4290 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4291 data_ref = make_ssa_name (perm_dest, NULL);
4293 /* Generate the permute statement. */
4294 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4295 x, y, mask_vec);
4296 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4298 return data_ref;
4301 /* vectorizable_load.
4303 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4304 can be vectorized.
4305 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4306 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4307 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4309 static bool
4310 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4311 slp_tree slp_node, slp_instance slp_node_instance)
4313 tree scalar_dest;
4314 tree vec_dest = NULL;
4315 tree data_ref = NULL;
4316 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4317 stmt_vec_info prev_stmt_info;
4318 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4319 struct loop *loop = NULL;
4320 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4321 bool nested_in_vect_loop = false;
4322 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4323 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4324 tree elem_type;
4325 tree new_temp;
4326 enum machine_mode mode;
4327 gimple new_stmt = NULL;
4328 tree dummy;
4329 enum dr_alignment_support alignment_support_scheme;
4330 tree dataref_ptr = NULL_TREE;
4331 gimple ptr_incr;
4332 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4333 int ncopies;
4334 int i, j, group_size;
4335 tree msq = NULL_TREE, lsq;
4336 tree offset = NULL_TREE;
4337 tree realignment_token = NULL_TREE;
4338 gimple phi = NULL;
4339 vec<tree> dr_chain = vNULL;
4340 bool grouped_load = false;
4341 bool load_lanes_p = false;
4342 gimple first_stmt;
4343 bool inv_p;
4344 bool negative = false;
4345 bool compute_in_loop = false;
4346 struct loop *at_loop;
4347 int vec_num;
4348 bool slp = (slp_node != NULL);
4349 bool slp_perm = false;
4350 enum tree_code code;
4351 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4352 int vf;
4353 tree aggr_type;
4354 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4355 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4356 int gather_scale = 1;
4357 enum vect_def_type gather_dt = vect_unknown_def_type;
4359 if (loop_vinfo)
4361 loop = LOOP_VINFO_LOOP (loop_vinfo);
4362 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4363 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4365 else
4366 vf = 1;
4368 /* Multiple types in SLP are handled by creating the appropriate number of
4369 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4370 case of SLP. */
4371 if (slp || PURE_SLP_STMT (stmt_info))
4372 ncopies = 1;
4373 else
4374 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4376 gcc_assert (ncopies >= 1);
4378 /* FORNOW. This restriction should be relaxed. */
4379 if (nested_in_vect_loop && ncopies > 1)
4381 if (dump_enabled_p ())
4382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4383 "multiple types in nested loop.");
4384 return false;
4387 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4388 return false;
4390 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4391 return false;
4393 /* Is vectorizable load? */
4394 if (!is_gimple_assign (stmt))
4395 return false;
4397 scalar_dest = gimple_assign_lhs (stmt);
4398 if (TREE_CODE (scalar_dest) != SSA_NAME)
4399 return false;
4401 code = gimple_assign_rhs_code (stmt);
4402 if (code != ARRAY_REF
4403 && code != INDIRECT_REF
4404 && code != COMPONENT_REF
4405 && code != IMAGPART_EXPR
4406 && code != REALPART_EXPR
4407 && code != MEM_REF
4408 && TREE_CODE_CLASS (code) != tcc_declaration)
4409 return false;
4411 if (!STMT_VINFO_DATA_REF (stmt_info))
4412 return false;
4414 elem_type = TREE_TYPE (vectype);
4415 mode = TYPE_MODE (vectype);
4417 /* FORNOW. In some cases can vectorize even if data-type not supported
4418 (e.g. - data copies). */
4419 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4423 "Aligned load, but unsupported type.");
4424 return false;
4427 /* Check if the load is a part of an interleaving chain. */
4428 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4430 grouped_load = true;
4431 /* FORNOW */
4432 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4434 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4435 if (!slp && !PURE_SLP_STMT (stmt_info))
4437 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4438 if (vect_load_lanes_supported (vectype, group_size))
4439 load_lanes_p = true;
4440 else if (!vect_grouped_load_supported (vectype, group_size))
4441 return false;
4446 if (STMT_VINFO_GATHER_P (stmt_info))
4448 gimple def_stmt;
4449 tree def;
4450 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4451 &gather_off, &gather_scale);
4452 gcc_assert (gather_decl);
4453 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4454 &def_stmt, &def, &gather_dt,
4455 &gather_off_vectype))
4457 if (dump_enabled_p ())
4458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4459 "gather index use not simple.");
4460 return false;
4463 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4465 else
4467 negative = tree_int_cst_compare (nested_in_vect_loop
4468 ? STMT_VINFO_DR_STEP (stmt_info)
4469 : DR_STEP (dr),
4470 size_zero_node) < 0;
4471 if (negative && ncopies > 1)
4473 if (dump_enabled_p ())
4474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4475 "multiple types with negative step.");
4476 return false;
4479 if (negative)
4481 gcc_assert (!grouped_load);
4482 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4483 if (alignment_support_scheme != dr_aligned
4484 && alignment_support_scheme != dr_unaligned_supported)
4486 if (dump_enabled_p ())
4487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4488 "negative step but alignment required.");
4489 return false;
4491 if (!perm_mask_for_reverse (vectype))
4493 if (dump_enabled_p ())
4494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4495 "negative step and reversing not supported.");
4496 return false;
4501 if (!vec_stmt) /* transformation not required. */
4503 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4504 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4505 return true;
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE, vect_location,
4510 "transform load. ncopies = %d", ncopies);
4512 /** Transform. **/
4514 if (STMT_VINFO_GATHER_P (stmt_info))
4516 tree vec_oprnd0 = NULL_TREE, op;
4517 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4518 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4519 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4520 edge pe = loop_preheader_edge (loop);
4521 gimple_seq seq;
4522 basic_block new_bb;
4523 enum { NARROW, NONE, WIDEN } modifier;
4524 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4526 if (nunits == gather_off_nunits)
4527 modifier = NONE;
4528 else if (nunits == gather_off_nunits / 2)
4530 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4531 modifier = WIDEN;
4533 for (i = 0; i < gather_off_nunits; ++i)
4534 sel[i] = i | nunits;
4536 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4537 gcc_assert (perm_mask != NULL_TREE);
4539 else if (nunits == gather_off_nunits * 2)
4541 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4542 modifier = NARROW;
4544 for (i = 0; i < nunits; ++i)
4545 sel[i] = i < gather_off_nunits
4546 ? i : i + nunits - gather_off_nunits;
4548 perm_mask = vect_gen_perm_mask (vectype, sel);
4549 gcc_assert (perm_mask != NULL_TREE);
4550 ncopies *= 2;
4552 else
4553 gcc_unreachable ();
4555 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4556 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4557 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4558 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4559 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4560 scaletype = TREE_VALUE (arglist);
4561 gcc_checking_assert (types_compatible_p (srctype, rettype)
4562 && types_compatible_p (srctype, masktype));
4564 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4566 ptr = fold_convert (ptrtype, gather_base);
4567 if (!is_gimple_min_invariant (ptr))
4569 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4570 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4571 gcc_assert (!new_bb);
4574 /* Currently we support only unconditional gather loads,
4575 so mask should be all ones. */
4576 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4577 mask = build_int_cst (TREE_TYPE (masktype), -1);
4578 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4580 REAL_VALUE_TYPE r;
4581 long tmp[6];
4582 for (j = 0; j < 6; ++j)
4583 tmp[j] = -1;
4584 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4585 mask = build_real (TREE_TYPE (masktype), r);
4587 else
4588 gcc_unreachable ();
4589 mask = build_vector_from_val (masktype, mask);
4590 mask = vect_init_vector (stmt, mask, masktype, NULL);
4592 scale = build_int_cst (scaletype, gather_scale);
4594 prev_stmt_info = NULL;
4595 for (j = 0; j < ncopies; ++j)
4597 if (modifier == WIDEN && (j & 1))
4598 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4599 perm_mask, stmt, gsi);
4600 else if (j == 0)
4601 op = vec_oprnd0
4602 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4603 else
4604 op = vec_oprnd0
4605 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4607 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4609 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4610 == TYPE_VECTOR_SUBPARTS (idxtype));
4611 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4612 var = make_ssa_name (var, NULL);
4613 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4614 new_stmt
4615 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4616 op, NULL_TREE);
4617 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4618 op = var;
4621 new_stmt
4622 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4624 if (!useless_type_conversion_p (vectype, rettype))
4626 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4627 == TYPE_VECTOR_SUBPARTS (rettype));
4628 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4629 op = make_ssa_name (var, new_stmt);
4630 gimple_call_set_lhs (new_stmt, op);
4631 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4632 var = make_ssa_name (vec_dest, NULL);
4633 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4634 new_stmt
4635 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4636 NULL_TREE);
4638 else
4640 var = make_ssa_name (vec_dest, new_stmt);
4641 gimple_call_set_lhs (new_stmt, var);
4644 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4646 if (modifier == NARROW)
4648 if ((j & 1) == 0)
4650 prev_res = var;
4651 continue;
4653 var = permute_vec_elements (prev_res, var,
4654 perm_mask, stmt, gsi);
4655 new_stmt = SSA_NAME_DEF_STMT (var);
4658 if (prev_stmt_info == NULL)
4659 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4660 else
4661 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4662 prev_stmt_info = vinfo_for_stmt (new_stmt);
4664 return true;
4666 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4668 gimple_stmt_iterator incr_gsi;
4669 bool insert_after;
4670 gimple incr;
4671 tree offvar;
4672 tree ivstep;
4673 tree running_off;
4674 vec<constructor_elt, va_gc> *v = NULL;
4675 gimple_seq stmts = NULL;
4676 tree stride_base, stride_step, alias_off;
4678 gcc_assert (!nested_in_vect_loop);
4680 stride_base
4681 = fold_build_pointer_plus
4682 (unshare_expr (DR_BASE_ADDRESS (dr)),
4683 size_binop (PLUS_EXPR,
4684 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4685 convert_to_ptrofftype (DR_INIT(dr))));
4686 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4688 /* For a load with loop-invariant (but other than power-of-2)
4689 stride (i.e. not a grouped access) like so:
4691 for (i = 0; i < n; i += stride)
4692 ... = array[i];
4694 we generate a new induction variable and new accesses to
4695 form a new vector (or vectors, depending on ncopies):
4697 for (j = 0; ; j += VF*stride)
4698 tmp1 = array[j];
4699 tmp2 = array[j + stride];
4701 vectemp = {tmp1, tmp2, ...}
4704 ivstep = stride_step;
4705 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4706 build_int_cst (TREE_TYPE (ivstep), vf));
4708 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4710 create_iv (stride_base, ivstep, NULL,
4711 loop, &incr_gsi, insert_after,
4712 &offvar, NULL);
4713 incr = gsi_stmt (incr_gsi);
4714 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4716 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4717 if (stmts)
4718 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4720 prev_stmt_info = NULL;
4721 running_off = offvar;
4722 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4723 for (j = 0; j < ncopies; j++)
4725 tree vec_inv;
4727 vec_alloc (v, nunits);
4728 for (i = 0; i < nunits; i++)
4730 tree newref, newoff;
4731 gimple incr;
4732 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4733 running_off, alias_off);
4735 newref = force_gimple_operand_gsi (gsi, newref, true,
4736 NULL_TREE, true,
4737 GSI_SAME_STMT);
4738 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4739 newoff = copy_ssa_name (running_off, NULL);
4740 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4741 running_off, stride_step);
4742 vect_finish_stmt_generation (stmt, incr, gsi);
4744 running_off = newoff;
4747 vec_inv = build_constructor (vectype, v);
4748 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4749 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4751 if (j == 0)
4752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4753 else
4754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4755 prev_stmt_info = vinfo_for_stmt (new_stmt);
4757 return true;
4760 if (grouped_load)
4762 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4763 if (slp
4764 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
4765 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4766 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4768 /* Check if the chain of loads is already vectorized. */
4769 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4771 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4772 return true;
4774 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4775 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4777 /* VEC_NUM is the number of vect stmts to be created for this group. */
4778 if (slp)
4780 grouped_load = false;
4781 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4782 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
4783 slp_perm = true;
4785 else
4786 vec_num = group_size;
4788 else
4790 first_stmt = stmt;
4791 first_dr = dr;
4792 group_size = vec_num = 1;
4795 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4796 gcc_assert (alignment_support_scheme);
4797 /* Targets with load-lane instructions must not require explicit
4798 realignment. */
4799 gcc_assert (!load_lanes_p
4800 || alignment_support_scheme == dr_aligned
4801 || alignment_support_scheme == dr_unaligned_supported);
4803 /* In case the vectorization factor (VF) is bigger than the number
4804 of elements that we can fit in a vectype (nunits), we have to generate
4805 more than one vector stmt - i.e - we need to "unroll" the
4806 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4807 from one copy of the vector stmt to the next, in the field
4808 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4809 stages to find the correct vector defs to be used when vectorizing
4810 stmts that use the defs of the current stmt. The example below
4811 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4812 need to create 4 vectorized stmts):
4814 before vectorization:
4815 RELATED_STMT VEC_STMT
4816 S1: x = memref - -
4817 S2: z = x + 1 - -
4819 step 1: vectorize stmt S1:
4820 We first create the vector stmt VS1_0, and, as usual, record a
4821 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4822 Next, we create the vector stmt VS1_1, and record a pointer to
4823 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4824 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4825 stmts and pointers:
4826 RELATED_STMT VEC_STMT
4827 VS1_0: vx0 = memref0 VS1_1 -
4828 VS1_1: vx1 = memref1 VS1_2 -
4829 VS1_2: vx2 = memref2 VS1_3 -
4830 VS1_3: vx3 = memref3 - -
4831 S1: x = load - VS1_0
4832 S2: z = x + 1 - -
4834 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4835 information we recorded in RELATED_STMT field is used to vectorize
4836 stmt S2. */
4838 /* In case of interleaving (non-unit grouped access):
4840 S1: x2 = &base + 2
4841 S2: x0 = &base
4842 S3: x1 = &base + 1
4843 S4: x3 = &base + 3
4845 Vectorized loads are created in the order of memory accesses
4846 starting from the access of the first stmt of the chain:
4848 VS1: vx0 = &base
4849 VS2: vx1 = &base + vec_size*1
4850 VS3: vx3 = &base + vec_size*2
4851 VS4: vx4 = &base + vec_size*3
4853 Then permutation statements are generated:
4855 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4856 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4859 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4860 (the order of the data-refs in the output of vect_permute_load_chain
4861 corresponds to the order of scalar stmts in the interleaving chain - see
4862 the documentation of vect_permute_load_chain()).
4863 The generation of permutation stmts and recording them in
4864 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4866 In case of both multiple types and interleaving, the vector loads and
4867 permutation stmts above are created for every copy. The result vector
4868 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4869 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4871 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4872 on a target that supports unaligned accesses (dr_unaligned_supported)
4873 we generate the following code:
4874 p = initial_addr;
4875 indx = 0;
4876 loop {
4877 p = p + indx * vectype_size;
4878 vec_dest = *(p);
4879 indx = indx + 1;
4882 Otherwise, the data reference is potentially unaligned on a target that
4883 does not support unaligned accesses (dr_explicit_realign_optimized) -
4884 then generate the following code, in which the data in each iteration is
4885 obtained by two vector loads, one from the previous iteration, and one
4886 from the current iteration:
4887 p1 = initial_addr;
4888 msq_init = *(floor(p1))
4889 p2 = initial_addr + VS - 1;
4890 realignment_token = call target_builtin;
4891 indx = 0;
4892 loop {
4893 p2 = p2 + indx * vectype_size
4894 lsq = *(floor(p2))
4895 vec_dest = realign_load (msq, lsq, realignment_token)
4896 indx = indx + 1;
4897 msq = lsq;
4898 } */
4900 /* If the misalignment remains the same throughout the execution of the
4901 loop, we can create the init_addr and permutation mask at the loop
4902 preheader. Otherwise, it needs to be created inside the loop.
4903 This can only occur when vectorizing memory accesses in the inner-loop
4904 nested within an outer-loop that is being vectorized. */
4906 if (nested_in_vect_loop
4907 && (TREE_INT_CST_LOW (DR_STEP (dr))
4908 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4910 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4911 compute_in_loop = true;
4914 if ((alignment_support_scheme == dr_explicit_realign_optimized
4915 || alignment_support_scheme == dr_explicit_realign)
4916 && !compute_in_loop)
4918 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4919 alignment_support_scheme, NULL_TREE,
4920 &at_loop);
4921 if (alignment_support_scheme == dr_explicit_realign_optimized)
4923 phi = SSA_NAME_DEF_STMT (msq);
4924 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4927 else
4928 at_loop = loop;
4930 if (negative)
4931 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4933 if (load_lanes_p)
4934 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4935 else
4936 aggr_type = vectype;
4938 prev_stmt_info = NULL;
4939 for (j = 0; j < ncopies; j++)
4941 /* 1. Create the vector or array pointer update chain. */
4942 if (j == 0)
4943 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4944 offset, &dummy, gsi,
4945 &ptr_incr, false, &inv_p);
4946 else
4947 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4948 TYPE_SIZE_UNIT (aggr_type));
4950 if (grouped_load || slp_perm)
4951 dr_chain.create (vec_num);
4953 if (load_lanes_p)
4955 tree vec_array;
4957 vec_array = create_vector_array (vectype, vec_num);
4959 /* Emit:
4960 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4961 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4962 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4963 gimple_call_set_lhs (new_stmt, vec_array);
4964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4966 /* Extract each vector into an SSA_NAME. */
4967 for (i = 0; i < vec_num; i++)
4969 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4970 vec_array, i);
4971 dr_chain.quick_push (new_temp);
4974 /* Record the mapping between SSA_NAMEs and statements. */
4975 vect_record_grouped_load_vectors (stmt, dr_chain);
4977 else
4979 for (i = 0; i < vec_num; i++)
4981 if (i > 0)
4982 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4983 stmt, NULL_TREE);
4985 /* 2. Create the vector-load in the loop. */
4986 switch (alignment_support_scheme)
4988 case dr_aligned:
4989 case dr_unaligned_supported:
4991 unsigned int align, misalign;
4993 data_ref
4994 = build2 (MEM_REF, vectype, dataref_ptr,
4995 build_int_cst (reference_alias_ptr_type
4996 (DR_REF (first_dr)), 0));
4997 align = TYPE_ALIGN_UNIT (vectype);
4998 if (alignment_support_scheme == dr_aligned)
5000 gcc_assert (aligned_access_p (first_dr));
5001 misalign = 0;
5003 else if (DR_MISALIGNMENT (first_dr) == -1)
5005 TREE_TYPE (data_ref)
5006 = build_aligned_type (TREE_TYPE (data_ref),
5007 TYPE_ALIGN (elem_type));
5008 align = TYPE_ALIGN_UNIT (elem_type);
5009 misalign = 0;
5011 else
5013 TREE_TYPE (data_ref)
5014 = build_aligned_type (TREE_TYPE (data_ref),
5015 TYPE_ALIGN (elem_type));
5016 misalign = DR_MISALIGNMENT (first_dr);
5018 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5019 align, misalign);
5020 break;
5022 case dr_explicit_realign:
5024 tree ptr, bump;
5025 tree vs_minus_1;
5027 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5029 if (compute_in_loop)
5030 msq = vect_setup_realignment (first_stmt, gsi,
5031 &realignment_token,
5032 dr_explicit_realign,
5033 dataref_ptr, NULL);
5035 ptr = copy_ssa_name (dataref_ptr, NULL);
5036 new_stmt = gimple_build_assign_with_ops
5037 (BIT_AND_EXPR, ptr, dataref_ptr,
5038 build_int_cst
5039 (TREE_TYPE (dataref_ptr),
5040 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5041 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5042 data_ref
5043 = build2 (MEM_REF, vectype, ptr,
5044 build_int_cst (reference_alias_ptr_type
5045 (DR_REF (first_dr)), 0));
5046 vec_dest = vect_create_destination_var (scalar_dest,
5047 vectype);
5048 new_stmt = gimple_build_assign (vec_dest, data_ref);
5049 new_temp = make_ssa_name (vec_dest, new_stmt);
5050 gimple_assign_set_lhs (new_stmt, new_temp);
5051 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5052 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5053 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5054 msq = new_temp;
5056 bump = size_binop (MULT_EXPR, vs_minus_1,
5057 TYPE_SIZE_UNIT (elem_type));
5058 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5059 new_stmt = gimple_build_assign_with_ops
5060 (BIT_AND_EXPR, NULL_TREE, ptr,
5061 build_int_cst
5062 (TREE_TYPE (ptr),
5063 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5064 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5065 gimple_assign_set_lhs (new_stmt, ptr);
5066 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5067 data_ref
5068 = build2 (MEM_REF, vectype, ptr,
5069 build_int_cst (reference_alias_ptr_type
5070 (DR_REF (first_dr)), 0));
5071 break;
5073 case dr_explicit_realign_optimized:
5074 new_temp = copy_ssa_name (dataref_ptr, NULL);
5075 new_stmt = gimple_build_assign_with_ops
5076 (BIT_AND_EXPR, new_temp, dataref_ptr,
5077 build_int_cst
5078 (TREE_TYPE (dataref_ptr),
5079 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5080 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5081 data_ref
5082 = build2 (MEM_REF, vectype, new_temp,
5083 build_int_cst (reference_alias_ptr_type
5084 (DR_REF (first_dr)), 0));
5085 break;
5086 default:
5087 gcc_unreachable ();
5089 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5090 new_stmt = gimple_build_assign (vec_dest, data_ref);
5091 new_temp = make_ssa_name (vec_dest, new_stmt);
5092 gimple_assign_set_lhs (new_stmt, new_temp);
5093 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5095 /* 3. Handle explicit realignment if necessary/supported.
5096 Create in loop:
5097 vec_dest = realign_load (msq, lsq, realignment_token) */
5098 if (alignment_support_scheme == dr_explicit_realign_optimized
5099 || alignment_support_scheme == dr_explicit_realign)
5101 lsq = gimple_assign_lhs (new_stmt);
5102 if (!realignment_token)
5103 realignment_token = dataref_ptr;
5104 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5105 new_stmt
5106 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5107 vec_dest, msq, lsq,
5108 realignment_token);
5109 new_temp = make_ssa_name (vec_dest, new_stmt);
5110 gimple_assign_set_lhs (new_stmt, new_temp);
5111 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5113 if (alignment_support_scheme == dr_explicit_realign_optimized)
5115 gcc_assert (phi);
5116 if (i == vec_num - 1 && j == ncopies - 1)
5117 add_phi_arg (phi, lsq,
5118 loop_latch_edge (containing_loop),
5119 UNKNOWN_LOCATION);
5120 msq = lsq;
5124 /* 4. Handle invariant-load. */
5125 if (inv_p && !bb_vinfo)
5127 gimple_stmt_iterator gsi2 = *gsi;
5128 gcc_assert (!grouped_load);
5129 gsi_next (&gsi2);
5130 new_temp = vect_init_vector (stmt, scalar_dest,
5131 vectype, &gsi2);
5132 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5135 if (negative)
5137 tree perm_mask = perm_mask_for_reverse (vectype);
5138 new_temp = permute_vec_elements (new_temp, new_temp,
5139 perm_mask, stmt, gsi);
5140 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5143 /* Collect vector loads and later create their permutation in
5144 vect_transform_grouped_load (). */
5145 if (grouped_load || slp_perm)
5146 dr_chain.quick_push (new_temp);
5148 /* Store vector loads in the corresponding SLP_NODE. */
5149 if (slp && !slp_perm)
5150 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5154 if (slp && !slp_perm)
5155 continue;
5157 if (slp_perm)
5159 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5160 slp_node_instance, false))
5162 dr_chain.release ();
5163 return false;
5166 else
5168 if (grouped_load)
5170 if (!load_lanes_p)
5171 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5172 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5174 else
5176 if (j == 0)
5177 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5178 else
5179 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5180 prev_stmt_info = vinfo_for_stmt (new_stmt);
5183 dr_chain.release ();
5186 return true;
5189 /* Function vect_is_simple_cond.
5191 Input:
5192 LOOP - the loop that is being vectorized.
5193 COND - Condition that is checked for simple use.
5195 Output:
5196 *COMP_VECTYPE - the vector type for the comparison.
5198 Returns whether a COND can be vectorized. Checks whether
5199 condition operands are supportable using vec_is_simple_use. */
5201 static bool
5202 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5203 bb_vec_info bb_vinfo, tree *comp_vectype)
5205 tree lhs, rhs;
5206 tree def;
5207 enum vect_def_type dt;
5208 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5210 if (!COMPARISON_CLASS_P (cond))
5211 return false;
5213 lhs = TREE_OPERAND (cond, 0);
5214 rhs = TREE_OPERAND (cond, 1);
5216 if (TREE_CODE (lhs) == SSA_NAME)
5218 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5219 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5220 &lhs_def_stmt, &def, &dt, &vectype1))
5221 return false;
5223 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5224 && TREE_CODE (lhs) != FIXED_CST)
5225 return false;
5227 if (TREE_CODE (rhs) == SSA_NAME)
5229 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5230 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5231 &rhs_def_stmt, &def, &dt, &vectype2))
5232 return false;
5234 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5235 && TREE_CODE (rhs) != FIXED_CST)
5236 return false;
5238 *comp_vectype = vectype1 ? vectype1 : vectype2;
5239 return true;
5242 /* vectorizable_condition.
5244 Check if STMT is conditional modify expression that can be vectorized.
5245 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5246 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5247 at GSI.
5249 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5250 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5251 else caluse if it is 2).
5253 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5255 bool
5256 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5257 gimple *vec_stmt, tree reduc_def, int reduc_index,
5258 slp_tree slp_node)
5260 tree scalar_dest = NULL_TREE;
5261 tree vec_dest = NULL_TREE;
5262 tree cond_expr, then_clause, else_clause;
5263 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5264 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5265 tree comp_vectype = NULL_TREE;
5266 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5267 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5268 tree vec_compare, vec_cond_expr;
5269 tree new_temp;
5270 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5271 tree def;
5272 enum vect_def_type dt, dts[4];
5273 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5274 int ncopies;
5275 enum tree_code code;
5276 stmt_vec_info prev_stmt_info = NULL;
5277 int i, j;
5278 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5279 vec<tree> vec_oprnds0 = vNULL;
5280 vec<tree> vec_oprnds1 = vNULL;
5281 vec<tree> vec_oprnds2 = vNULL;
5282 vec<tree> vec_oprnds3 = vNULL;
5283 tree vec_cmp_type = vectype;
5285 if (slp_node || PURE_SLP_STMT (stmt_info))
5286 ncopies = 1;
5287 else
5288 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5290 gcc_assert (ncopies >= 1);
5291 if (reduc_index && ncopies > 1)
5292 return false; /* FORNOW */
5294 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5295 return false;
5297 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5298 return false;
5300 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5301 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5302 && reduc_def))
5303 return false;
5305 /* FORNOW: not yet supported. */
5306 if (STMT_VINFO_LIVE_P (stmt_info))
5308 if (dump_enabled_p ())
5309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5310 "value used after loop.");
5311 return false;
5314 /* Is vectorizable conditional operation? */
5315 if (!is_gimple_assign (stmt))
5316 return false;
5318 code = gimple_assign_rhs_code (stmt);
5320 if (code != COND_EXPR)
5321 return false;
5323 cond_expr = gimple_assign_rhs1 (stmt);
5324 then_clause = gimple_assign_rhs2 (stmt);
5325 else_clause = gimple_assign_rhs3 (stmt);
5327 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5328 &comp_vectype)
5329 || !comp_vectype)
5330 return false;
5332 if (TREE_CODE (then_clause) == SSA_NAME)
5334 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5335 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5336 &then_def_stmt, &def, &dt))
5337 return false;
5339 else if (TREE_CODE (then_clause) != INTEGER_CST
5340 && TREE_CODE (then_clause) != REAL_CST
5341 && TREE_CODE (then_clause) != FIXED_CST)
5342 return false;
5344 if (TREE_CODE (else_clause) == SSA_NAME)
5346 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5347 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5348 &else_def_stmt, &def, &dt))
5349 return false;
5351 else if (TREE_CODE (else_clause) != INTEGER_CST
5352 && TREE_CODE (else_clause) != REAL_CST
5353 && TREE_CODE (else_clause) != FIXED_CST)
5354 return false;
5356 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
5358 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5359 tree cmp_type = build_nonstandard_integer_type (prec, 1);
5360 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5361 if (vec_cmp_type == NULL_TREE)
5362 return false;
5365 if (!vec_stmt)
5367 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5368 return expand_vec_cond_expr_p (vectype, comp_vectype);
5371 /* Transform. */
5373 if (!slp_node)
5375 vec_oprnds0.create (1);
5376 vec_oprnds1.create (1);
5377 vec_oprnds2.create (1);
5378 vec_oprnds3.create (1);
5381 /* Handle def. */
5382 scalar_dest = gimple_assign_lhs (stmt);
5383 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5385 /* Handle cond expr. */
5386 for (j = 0; j < ncopies; j++)
5388 gimple new_stmt = NULL;
5389 if (j == 0)
5391 if (slp_node)
5393 vec<tree> ops;
5394 ops.create (4);
5395 vec<slp_void_p> vec_defs;
5397 vec_defs.create (4);
5398 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5399 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5400 ops.safe_push (then_clause);
5401 ops.safe_push (else_clause);
5402 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5403 vec_oprnds3 = *((vec<tree> *) vec_defs.pop ());
5404 vec_oprnds2 = *((vec<tree> *) vec_defs.pop ());
5405 vec_oprnds1 = *((vec<tree> *) vec_defs.pop ());
5406 vec_oprnds0 = *((vec<tree> *) vec_defs.pop ());
5408 ops.release ();
5409 vec_defs.release ();
5411 else
5413 gimple gtemp;
5414 vec_cond_lhs =
5415 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5416 stmt, NULL);
5417 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5418 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5420 vec_cond_rhs =
5421 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5422 stmt, NULL);
5423 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5424 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5425 if (reduc_index == 1)
5426 vec_then_clause = reduc_def;
5427 else
5429 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5430 stmt, NULL);
5431 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5432 NULL, &gtemp, &def, &dts[2]);
5434 if (reduc_index == 2)
5435 vec_else_clause = reduc_def;
5436 else
5438 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5439 stmt, NULL);
5440 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5441 NULL, &gtemp, &def, &dts[3]);
5445 else
5447 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5448 vec_oprnds0.pop ());
5449 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5450 vec_oprnds1.pop ());
5451 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5452 vec_oprnds2.pop ());
5453 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5454 vec_oprnds3.pop ());
5457 if (!slp_node)
5459 vec_oprnds0.quick_push (vec_cond_lhs);
5460 vec_oprnds1.quick_push (vec_cond_rhs);
5461 vec_oprnds2.quick_push (vec_then_clause);
5462 vec_oprnds3.quick_push (vec_else_clause);
5465 /* Arguments are ready. Create the new vector stmt. */
5466 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5468 vec_cond_rhs = vec_oprnds1[i];
5469 vec_then_clause = vec_oprnds2[i];
5470 vec_else_clause = vec_oprnds3[i];
5472 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5473 vec_cond_lhs, vec_cond_rhs);
5474 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5475 vec_compare, vec_then_clause, vec_else_clause);
5477 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5478 new_temp = make_ssa_name (vec_dest, new_stmt);
5479 gimple_assign_set_lhs (new_stmt, new_temp);
5480 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5481 if (slp_node)
5482 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5485 if (slp_node)
5486 continue;
5488 if (j == 0)
5489 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5490 else
5491 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5493 prev_stmt_info = vinfo_for_stmt (new_stmt);
5496 vec_oprnds0.release ();
5497 vec_oprnds1.release ();
5498 vec_oprnds2.release ();
5499 vec_oprnds3.release ();
5501 return true;
5505 /* Make sure the statement is vectorizable. */
5507 bool
5508 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5510 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5511 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5512 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5513 bool ok;
5514 tree scalar_type, vectype;
5515 gimple pattern_stmt;
5516 gimple_seq pattern_def_seq;
5518 if (dump_enabled_p ())
5520 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5521 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5524 if (gimple_has_volatile_ops (stmt))
5526 if (dump_enabled_p ())
5527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5528 "not vectorized: stmt has volatile operands");
5530 return false;
5533 /* Skip stmts that do not need to be vectorized. In loops this is expected
5534 to include:
5535 - the COND_EXPR which is the loop exit condition
5536 - any LABEL_EXPRs in the loop
5537 - computations that are used only for array indexing or loop control.
5538 In basic blocks we only analyze statements that are a part of some SLP
5539 instance, therefore, all the statements are relevant.
5541 Pattern statement needs to be analyzed instead of the original statement
5542 if the original statement is not relevant. Otherwise, we analyze both
5543 statements. In basic blocks we are called from some SLP instance
5544 traversal, don't analyze pattern stmts instead, the pattern stmts
5545 already will be part of SLP instance. */
5547 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5548 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5549 && !STMT_VINFO_LIVE_P (stmt_info))
5551 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5552 && pattern_stmt
5553 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5554 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5556 /* Analyze PATTERN_STMT instead of the original stmt. */
5557 stmt = pattern_stmt;
5558 stmt_info = vinfo_for_stmt (pattern_stmt);
5559 if (dump_enabled_p ())
5561 dump_printf_loc (MSG_NOTE, vect_location,
5562 "==> examining pattern statement: ");
5563 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5566 else
5568 if (dump_enabled_p ())
5569 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5571 return true;
5574 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5575 && node == NULL
5576 && pattern_stmt
5577 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5578 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5580 /* Analyze PATTERN_STMT too. */
5581 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_NOTE, vect_location,
5584 "==> examining pattern statement: ");
5585 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5588 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5589 return false;
5592 if (is_pattern_stmt_p (stmt_info)
5593 && node == NULL
5594 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5596 gimple_stmt_iterator si;
5598 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5600 gimple pattern_def_stmt = gsi_stmt (si);
5601 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5602 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5604 /* Analyze def stmt of STMT if it's a pattern stmt. */
5605 if (dump_enabled_p ())
5607 dump_printf_loc (MSG_NOTE, vect_location,
5608 "==> examining pattern def statement: ");
5609 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5612 if (!vect_analyze_stmt (pattern_def_stmt,
5613 need_to_vectorize, node))
5614 return false;
5619 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5621 case vect_internal_def:
5622 break;
5624 case vect_reduction_def:
5625 case vect_nested_cycle:
5626 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5627 || relevance == vect_used_in_outer_by_reduction
5628 || relevance == vect_unused_in_scope));
5629 break;
5631 case vect_induction_def:
5632 case vect_constant_def:
5633 case vect_external_def:
5634 case vect_unknown_def_type:
5635 default:
5636 gcc_unreachable ();
5639 if (bb_vinfo)
5641 gcc_assert (PURE_SLP_STMT (stmt_info));
5643 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5644 if (dump_enabled_p ())
5646 dump_printf_loc (MSG_NOTE, vect_location,
5647 "get vectype for scalar type: ");
5648 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5651 vectype = get_vectype_for_scalar_type (scalar_type);
5652 if (!vectype)
5654 if (dump_enabled_p ())
5656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5657 "not SLPed: unsupported data-type ");
5658 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5659 scalar_type);
5661 return false;
5664 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5667 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5670 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5673 if (STMT_VINFO_RELEVANT_P (stmt_info))
5675 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5676 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5677 *need_to_vectorize = true;
5680 ok = true;
5681 if (!bb_vinfo
5682 && (STMT_VINFO_RELEVANT_P (stmt_info)
5683 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5684 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5685 || vectorizable_shift (stmt, NULL, NULL, NULL)
5686 || vectorizable_operation (stmt, NULL, NULL, NULL)
5687 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5688 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5689 || vectorizable_call (stmt, NULL, NULL, NULL)
5690 || vectorizable_store (stmt, NULL, NULL, NULL)
5691 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5692 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5693 else
5695 if (bb_vinfo)
5696 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5697 || vectorizable_shift (stmt, NULL, NULL, node)
5698 || vectorizable_operation (stmt, NULL, NULL, node)
5699 || vectorizable_assignment (stmt, NULL, NULL, node)
5700 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5701 || vectorizable_call (stmt, NULL, NULL, node)
5702 || vectorizable_store (stmt, NULL, NULL, node)
5703 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5706 if (!ok)
5708 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5711 "not vectorized: relevant stmt not ");
5712 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5713 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5716 return false;
5719 if (bb_vinfo)
5720 return true;
5722 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5723 need extra handling, except for vectorizable reductions. */
5724 if (STMT_VINFO_LIVE_P (stmt_info)
5725 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5726 ok = vectorizable_live_operation (stmt, NULL, NULL);
5728 if (!ok)
5730 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733 "not vectorized: live stmt not ");
5734 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5738 return false;
5741 return true;
5745 /* Function vect_transform_stmt.
5747 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5749 bool
5750 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5751 bool *grouped_store, slp_tree slp_node,
5752 slp_instance slp_node_instance)
5754 bool is_store = false;
5755 gimple vec_stmt = NULL;
5756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5757 bool done;
5759 switch (STMT_VINFO_TYPE (stmt_info))
5761 case type_demotion_vec_info_type:
5762 case type_promotion_vec_info_type:
5763 case type_conversion_vec_info_type:
5764 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5765 gcc_assert (done);
5766 break;
5768 case induc_vec_info_type:
5769 gcc_assert (!slp_node);
5770 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5771 gcc_assert (done);
5772 break;
5774 case shift_vec_info_type:
5775 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5776 gcc_assert (done);
5777 break;
5779 case op_vec_info_type:
5780 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5781 gcc_assert (done);
5782 break;
5784 case assignment_vec_info_type:
5785 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5786 gcc_assert (done);
5787 break;
5789 case load_vec_info_type:
5790 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5791 slp_node_instance);
5792 gcc_assert (done);
5793 break;
5795 case store_vec_info_type:
5796 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5797 gcc_assert (done);
5798 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5800 /* In case of interleaving, the whole chain is vectorized when the
5801 last store in the chain is reached. Store stmts before the last
5802 one are skipped, and there vec_stmt_info shouldn't be freed
5803 meanwhile. */
5804 *grouped_store = true;
5805 if (STMT_VINFO_VEC_STMT (stmt_info))
5806 is_store = true;
5808 else
5809 is_store = true;
5810 break;
5812 case condition_vec_info_type:
5813 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5814 gcc_assert (done);
5815 break;
5817 case call_vec_info_type:
5818 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5819 stmt = gsi_stmt (*gsi);
5820 break;
5822 case reduc_vec_info_type:
5823 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5824 gcc_assert (done);
5825 break;
5827 default:
5828 if (!STMT_VINFO_LIVE_P (stmt_info))
5830 if (dump_enabled_p ())
5831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5832 "stmt not supported.");
5833 gcc_unreachable ();
5837 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5838 is being vectorized, but outside the immediately enclosing loop. */
5839 if (vec_stmt
5840 && STMT_VINFO_LOOP_VINFO (stmt_info)
5841 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5842 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5843 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5844 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5845 || STMT_VINFO_RELEVANT (stmt_info) ==
5846 vect_used_in_outer_by_reduction))
5848 struct loop *innerloop = LOOP_VINFO_LOOP (
5849 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5850 imm_use_iterator imm_iter;
5851 use_operand_p use_p;
5852 tree scalar_dest;
5853 gimple exit_phi;
5855 if (dump_enabled_p ())
5856 dump_printf_loc (MSG_NOTE, vect_location,
5857 "Record the vdef for outer-loop vectorization.");
5859 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5860 (to be used when vectorizing outer-loop stmts that use the DEF of
5861 STMT). */
5862 if (gimple_code (stmt) == GIMPLE_PHI)
5863 scalar_dest = PHI_RESULT (stmt);
5864 else
5865 scalar_dest = gimple_assign_lhs (stmt);
5867 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5869 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5871 exit_phi = USE_STMT (use_p);
5872 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5877 /* Handle stmts whose DEF is used outside the loop-nest that is
5878 being vectorized. */
5879 if (STMT_VINFO_LIVE_P (stmt_info)
5880 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5882 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5883 gcc_assert (done);
5886 if (vec_stmt)
5887 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5889 return is_store;
5893 /* Remove a group of stores (for SLP or interleaving), free their
5894 stmt_vec_info. */
5896 void
5897 vect_remove_stores (gimple first_stmt)
5899 gimple next = first_stmt;
5900 gimple tmp;
5901 gimple_stmt_iterator next_si;
5903 while (next)
5905 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5907 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5908 if (is_pattern_stmt_p (stmt_info))
5909 next = STMT_VINFO_RELATED_STMT (stmt_info);
5910 /* Free the attached stmt_vec_info and remove the stmt. */
5911 next_si = gsi_for_stmt (next);
5912 unlink_stmt_vdef (next);
5913 gsi_remove (&next_si, true);
5914 release_defs (next);
5915 free_stmt_vec_info (next);
5916 next = tmp;
5921 /* Function new_stmt_vec_info.
5923 Create and initialize a new stmt_vec_info struct for STMT. */
5925 stmt_vec_info
5926 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5927 bb_vec_info bb_vinfo)
5929 stmt_vec_info res;
5930 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5932 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5933 STMT_VINFO_STMT (res) = stmt;
5934 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5935 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5936 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5937 STMT_VINFO_LIVE_P (res) = false;
5938 STMT_VINFO_VECTYPE (res) = NULL;
5939 STMT_VINFO_VEC_STMT (res) = NULL;
5940 STMT_VINFO_VECTORIZABLE (res) = true;
5941 STMT_VINFO_IN_PATTERN_P (res) = false;
5942 STMT_VINFO_RELATED_STMT (res) = NULL;
5943 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5944 STMT_VINFO_DATA_REF (res) = NULL;
5946 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5947 STMT_VINFO_DR_OFFSET (res) = NULL;
5948 STMT_VINFO_DR_INIT (res) = NULL;
5949 STMT_VINFO_DR_STEP (res) = NULL;
5950 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5952 if (gimple_code (stmt) == GIMPLE_PHI
5953 && is_loop_header_bb_p (gimple_bb (stmt)))
5954 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5955 else
5956 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5958 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
5959 STMT_SLP_TYPE (res) = loop_vect;
5960 GROUP_FIRST_ELEMENT (res) = NULL;
5961 GROUP_NEXT_ELEMENT (res) = NULL;
5962 GROUP_SIZE (res) = 0;
5963 GROUP_STORE_COUNT (res) = 0;
5964 GROUP_GAP (res) = 0;
5965 GROUP_SAME_DR_STMT (res) = NULL;
5966 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5968 return res;
5972 /* Create a hash table for stmt_vec_info. */
5974 void
5975 init_stmt_vec_info_vec (void)
5977 gcc_assert (!stmt_vec_info_vec.exists ());
5978 stmt_vec_info_vec.create (50);
5982 /* Free hash table for stmt_vec_info. */
5984 void
5985 free_stmt_vec_info_vec (void)
5987 gcc_assert (stmt_vec_info_vec.exists ());
5988 stmt_vec_info_vec.release ();
5992 /* Free stmt vectorization related info. */
5994 void
5995 free_stmt_vec_info (gimple stmt)
5997 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5999 if (!stmt_info)
6000 return;
6002 /* Check if this statement has a related "pattern stmt"
6003 (introduced by the vectorizer during the pattern recognition
6004 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6005 too. */
6006 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6008 stmt_vec_info patt_info
6009 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6010 if (patt_info)
6012 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6013 if (seq)
6015 gimple_stmt_iterator si;
6016 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6017 free_stmt_vec_info (gsi_stmt (si));
6019 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6023 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6024 set_vinfo_for_stmt (stmt, NULL);
6025 free (stmt_info);
6029 /* Function get_vectype_for_scalar_type_and_size.
6031 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6032 by the target. */
6034 static tree
6035 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6037 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6038 enum machine_mode simd_mode;
6039 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6040 int nunits;
6041 tree vectype;
6043 if (nbytes == 0)
6044 return NULL_TREE;
6046 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6047 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6048 return NULL_TREE;
6050 /* For vector types of elements whose mode precision doesn't
6051 match their types precision we use a element type of mode
6052 precision. The vectorization routines will have to make sure
6053 they support the proper result truncation/extension.
6054 We also make sure to build vector types with INTEGER_TYPE
6055 component type only. */
6056 if (INTEGRAL_TYPE_P (scalar_type)
6057 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6058 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6059 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6060 TYPE_UNSIGNED (scalar_type));
6062 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6063 When the component mode passes the above test simply use a type
6064 corresponding to that mode. The theory is that any use that
6065 would cause problems with this will disable vectorization anyway. */
6066 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6067 && !INTEGRAL_TYPE_P (scalar_type)
6068 && !POINTER_TYPE_P (scalar_type))
6069 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6071 /* We can't build a vector type of elements with alignment bigger than
6072 their size. */
6073 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6074 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6076 /* If we felt back to using the mode fail if there was
6077 no scalar type for it. */
6078 if (scalar_type == NULL_TREE)
6079 return NULL_TREE;
6081 /* If no size was supplied use the mode the target prefers. Otherwise
6082 lookup a vector mode of the specified size. */
6083 if (size == 0)
6084 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6085 else
6086 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6087 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6088 if (nunits <= 1)
6089 return NULL_TREE;
6091 vectype = build_vector_type (scalar_type, nunits);
6092 if (dump_enabled_p ())
6094 dump_printf_loc (MSG_NOTE, vect_location,
6095 "get vectype with %d units of type ", nunits);
6096 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
6099 if (!vectype)
6100 return NULL_TREE;
6102 if (dump_enabled_p ())
6104 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6105 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
6108 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6109 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6111 if (dump_enabled_p ())
6112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6113 "mode not supported by target.");
6114 return NULL_TREE;
6117 return vectype;
6120 unsigned int current_vector_size;
6122 /* Function get_vectype_for_scalar_type.
6124 Returns the vector type corresponding to SCALAR_TYPE as supported
6125 by the target. */
6127 tree
6128 get_vectype_for_scalar_type (tree scalar_type)
6130 tree vectype;
6131 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6132 current_vector_size);
6133 if (vectype
6134 && current_vector_size == 0)
6135 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6136 return vectype;
6139 /* Function get_same_sized_vectype
6141 Returns a vector type corresponding to SCALAR_TYPE of size
6142 VECTOR_TYPE if supported by the target. */
6144 tree
6145 get_same_sized_vectype (tree scalar_type, tree vector_type)
6147 return get_vectype_for_scalar_type_and_size
6148 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6151 /* Function vect_is_simple_use.
6153 Input:
6154 LOOP_VINFO - the vect info of the loop that is being vectorized.
6155 BB_VINFO - the vect info of the basic block that is being vectorized.
6156 OPERAND - operand of STMT in the loop or bb.
6157 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6159 Returns whether a stmt with OPERAND can be vectorized.
6160 For loops, supportable operands are constants, loop invariants, and operands
6161 that are defined by the current iteration of the loop. Unsupportable
6162 operands are those that are defined by a previous iteration of the loop (as
6163 is the case in reduction/induction computations).
6164 For basic blocks, supportable operands are constants and bb invariants.
6165 For now, operands defined outside the basic block are not supported. */
6167 bool
6168 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6169 bb_vec_info bb_vinfo, gimple *def_stmt,
6170 tree *def, enum vect_def_type *dt)
6172 basic_block bb;
6173 stmt_vec_info stmt_vinfo;
6174 struct loop *loop = NULL;
6176 if (loop_vinfo)
6177 loop = LOOP_VINFO_LOOP (loop_vinfo);
6179 *def_stmt = NULL;
6180 *def = NULL_TREE;
6182 if (dump_enabled_p ())
6184 dump_printf_loc (MSG_NOTE, vect_location,
6185 "vect_is_simple_use: operand ");
6186 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6189 if (CONSTANT_CLASS_P (operand))
6191 *dt = vect_constant_def;
6192 return true;
6195 if (is_gimple_min_invariant (operand))
6197 *def = operand;
6198 *dt = vect_external_def;
6199 return true;
6202 if (TREE_CODE (operand) == PAREN_EXPR)
6204 if (dump_enabled_p ())
6205 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6206 operand = TREE_OPERAND (operand, 0);
6209 if (TREE_CODE (operand) != SSA_NAME)
6211 if (dump_enabled_p ())
6212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6213 "not ssa-name.");
6214 return false;
6217 *def_stmt = SSA_NAME_DEF_STMT (operand);
6218 if (*def_stmt == NULL)
6220 if (dump_enabled_p ())
6221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6222 "no def_stmt.");
6223 return false;
6226 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6229 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6232 /* Empty stmt is expected only in case of a function argument.
6233 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6234 if (gimple_nop_p (*def_stmt))
6236 *def = operand;
6237 *dt = vect_external_def;
6238 return true;
6241 bb = gimple_bb (*def_stmt);
6243 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6244 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6245 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6246 *dt = vect_external_def;
6247 else
6249 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6250 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6253 if (*dt == vect_unknown_def_type
6254 || (stmt
6255 && *dt == vect_double_reduction_def
6256 && gimple_code (stmt) != GIMPLE_PHI))
6258 if (dump_enabled_p ())
6259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6260 "Unsupported pattern.");
6261 return false;
6264 if (dump_enabled_p ())
6265 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6267 switch (gimple_code (*def_stmt))
6269 case GIMPLE_PHI:
6270 *def = gimple_phi_result (*def_stmt);
6271 break;
6273 case GIMPLE_ASSIGN:
6274 *def = gimple_assign_lhs (*def_stmt);
6275 break;
6277 case GIMPLE_CALL:
6278 *def = gimple_call_lhs (*def_stmt);
6279 if (*def != NULL)
6280 break;
6281 /* FALLTHRU */
6282 default:
6283 if (dump_enabled_p ())
6284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6285 "unsupported defining stmt: ");
6286 return false;
6289 return true;
6292 /* Function vect_is_simple_use_1.
6294 Same as vect_is_simple_use_1 but also determines the vector operand
6295 type of OPERAND and stores it to *VECTYPE. If the definition of
6296 OPERAND is vect_uninitialized_def, vect_constant_def or
6297 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6298 is responsible to compute the best suited vector type for the
6299 scalar operand. */
6301 bool
6302 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6303 bb_vec_info bb_vinfo, gimple *def_stmt,
6304 tree *def, enum vect_def_type *dt, tree *vectype)
6306 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6307 def, dt))
6308 return false;
6310 /* Now get a vector type if the def is internal, otherwise supply
6311 NULL_TREE and leave it up to the caller to figure out a proper
6312 type for the use stmt. */
6313 if (*dt == vect_internal_def
6314 || *dt == vect_induction_def
6315 || *dt == vect_reduction_def
6316 || *dt == vect_double_reduction_def
6317 || *dt == vect_nested_cycle)
6319 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6321 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6322 && !STMT_VINFO_RELEVANT (stmt_info)
6323 && !STMT_VINFO_LIVE_P (stmt_info))
6324 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6326 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6327 gcc_assert (*vectype != NULL_TREE);
6329 else if (*dt == vect_uninitialized_def
6330 || *dt == vect_constant_def
6331 || *dt == vect_external_def)
6332 *vectype = NULL_TREE;
6333 else
6334 gcc_unreachable ();
6336 return true;
6340 /* Function supportable_widening_operation
6342 Check whether an operation represented by the code CODE is a
6343 widening operation that is supported by the target platform in
6344 vector form (i.e., when operating on arguments of type VECTYPE_IN
6345 producing a result of type VECTYPE_OUT).
6347 Widening operations we currently support are NOP (CONVERT), FLOAT
6348 and WIDEN_MULT. This function checks if these operations are supported
6349 by the target platform either directly (via vector tree-codes), or via
6350 target builtins.
6352 Output:
6353 - CODE1 and CODE2 are codes of vector operations to be used when
6354 vectorizing the operation, if available.
6355 - MULTI_STEP_CVT determines the number of required intermediate steps in
6356 case of multi-step conversion (like char->short->int - in that case
6357 MULTI_STEP_CVT will be 1).
6358 - INTERM_TYPES contains the intermediate type required to perform the
6359 widening operation (short in the above example). */
6361 bool
6362 supportable_widening_operation (enum tree_code code, gimple stmt,
6363 tree vectype_out, tree vectype_in,
6364 enum tree_code *code1, enum tree_code *code2,
6365 int *multi_step_cvt,
6366 vec<tree> *interm_types)
6368 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6369 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6370 struct loop *vect_loop = NULL;
6371 enum machine_mode vec_mode;
6372 enum insn_code icode1, icode2;
6373 optab optab1, optab2;
6374 tree vectype = vectype_in;
6375 tree wide_vectype = vectype_out;
6376 enum tree_code c1, c2;
6377 int i;
6378 tree prev_type, intermediate_type;
6379 enum machine_mode intermediate_mode, prev_mode;
6380 optab optab3, optab4;
6382 *multi_step_cvt = 0;
6383 if (loop_info)
6384 vect_loop = LOOP_VINFO_LOOP (loop_info);
6386 switch (code)
6388 case WIDEN_MULT_EXPR:
6389 /* The result of a vectorized widening operation usually requires
6390 two vectors (because the widened results do not fit into one vector).
6391 The generated vector results would normally be expected to be
6392 generated in the same order as in the original scalar computation,
6393 i.e. if 8 results are generated in each vector iteration, they are
6394 to be organized as follows:
6395 vect1: [res1,res2,res3,res4],
6396 vect2: [res5,res6,res7,res8].
6398 However, in the special case that the result of the widening
6399 operation is used in a reduction computation only, the order doesn't
6400 matter (because when vectorizing a reduction we change the order of
6401 the computation). Some targets can take advantage of this and
6402 generate more efficient code. For example, targets like Altivec,
6403 that support widen_mult using a sequence of {mult_even,mult_odd}
6404 generate the following vectors:
6405 vect1: [res1,res3,res5,res7],
6406 vect2: [res2,res4,res6,res8].
6408 When vectorizing outer-loops, we execute the inner-loop sequentially
6409 (each vectorized inner-loop iteration contributes to VF outer-loop
6410 iterations in parallel). We therefore don't allow to change the
6411 order of the computation in the inner-loop during outer-loop
6412 vectorization. */
6413 /* TODO: Another case in which order doesn't *really* matter is when we
6414 widen and then contract again, e.g. (short)((int)x * y >> 8).
6415 Normally, pack_trunc performs an even/odd permute, whereas the
6416 repack from an even/odd expansion would be an interleave, which
6417 would be significantly simpler for e.g. AVX2. */
6418 /* In any case, in order to avoid duplicating the code below, recurse
6419 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6420 are properly set up for the caller. If we fail, we'll continue with
6421 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6422 if (vect_loop
6423 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6424 && !nested_in_vect_loop_p (vect_loop, stmt)
6425 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6426 stmt, vectype_out, vectype_in,
6427 code1, code2, multi_step_cvt,
6428 interm_types))
6429 return true;
6430 c1 = VEC_WIDEN_MULT_LO_EXPR;
6431 c2 = VEC_WIDEN_MULT_HI_EXPR;
6432 break;
6434 case VEC_WIDEN_MULT_EVEN_EXPR:
6435 /* Support the recursion induced just above. */
6436 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6437 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6438 break;
6440 case WIDEN_LSHIFT_EXPR:
6441 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6442 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6443 break;
6445 CASE_CONVERT:
6446 c1 = VEC_UNPACK_LO_EXPR;
6447 c2 = VEC_UNPACK_HI_EXPR;
6448 break;
6450 case FLOAT_EXPR:
6451 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6452 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6453 break;
6455 case FIX_TRUNC_EXPR:
6456 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6457 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6458 computing the operation. */
6459 return false;
6461 default:
6462 gcc_unreachable ();
6465 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6467 enum tree_code ctmp = c1;
6468 c1 = c2;
6469 c2 = ctmp;
6472 if (code == FIX_TRUNC_EXPR)
6474 /* The signedness is determined from output operand. */
6475 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6476 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6478 else
6480 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6481 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6484 if (!optab1 || !optab2)
6485 return false;
6487 vec_mode = TYPE_MODE (vectype);
6488 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6489 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6490 return false;
6492 *code1 = c1;
6493 *code2 = c2;
6495 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6496 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6497 return true;
6499 /* Check if it's a multi-step conversion that can be done using intermediate
6500 types. */
6502 prev_type = vectype;
6503 prev_mode = vec_mode;
6505 if (!CONVERT_EXPR_CODE_P (code))
6506 return false;
6508 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6509 intermediate steps in promotion sequence. We try
6510 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6511 not. */
6512 interm_types->create (MAX_INTERM_CVT_STEPS);
6513 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6515 intermediate_mode = insn_data[icode1].operand[0].mode;
6516 intermediate_type
6517 = lang_hooks.types.type_for_mode (intermediate_mode,
6518 TYPE_UNSIGNED (prev_type));
6519 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6520 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6522 if (!optab3 || !optab4
6523 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6524 || insn_data[icode1].operand[0].mode != intermediate_mode
6525 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6526 || insn_data[icode2].operand[0].mode != intermediate_mode
6527 || ((icode1 = optab_handler (optab3, intermediate_mode))
6528 == CODE_FOR_nothing)
6529 || ((icode2 = optab_handler (optab4, intermediate_mode))
6530 == CODE_FOR_nothing))
6531 break;
6533 interm_types->quick_push (intermediate_type);
6534 (*multi_step_cvt)++;
6536 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6537 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6538 return true;
6540 prev_type = intermediate_type;
6541 prev_mode = intermediate_mode;
6544 interm_types->release ();
6545 return false;
6549 /* Function supportable_narrowing_operation
6551 Check whether an operation represented by the code CODE is a
6552 narrowing operation that is supported by the target platform in
6553 vector form (i.e., when operating on arguments of type VECTYPE_IN
6554 and producing a result of type VECTYPE_OUT).
6556 Narrowing operations we currently support are NOP (CONVERT) and
6557 FIX_TRUNC. This function checks if these operations are supported by
6558 the target platform directly via vector tree-codes.
6560 Output:
6561 - CODE1 is the code of a vector operation to be used when
6562 vectorizing the operation, if available.
6563 - MULTI_STEP_CVT determines the number of required intermediate steps in
6564 case of multi-step conversion (like int->short->char - in that case
6565 MULTI_STEP_CVT will be 1).
6566 - INTERM_TYPES contains the intermediate type required to perform the
6567 narrowing operation (short in the above example). */
6569 bool
6570 supportable_narrowing_operation (enum tree_code code,
6571 tree vectype_out, tree vectype_in,
6572 enum tree_code *code1, int *multi_step_cvt,
6573 vec<tree> *interm_types)
6575 enum machine_mode vec_mode;
6576 enum insn_code icode1;
6577 optab optab1, interm_optab;
6578 tree vectype = vectype_in;
6579 tree narrow_vectype = vectype_out;
6580 enum tree_code c1;
6581 tree intermediate_type;
6582 enum machine_mode intermediate_mode, prev_mode;
6583 int i;
6584 bool uns;
6586 *multi_step_cvt = 0;
6587 switch (code)
6589 CASE_CONVERT:
6590 c1 = VEC_PACK_TRUNC_EXPR;
6591 break;
6593 case FIX_TRUNC_EXPR:
6594 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6595 break;
6597 case FLOAT_EXPR:
6598 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6599 tree code and optabs used for computing the operation. */
6600 return false;
6602 default:
6603 gcc_unreachable ();
6606 if (code == FIX_TRUNC_EXPR)
6607 /* The signedness is determined from output operand. */
6608 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6609 else
6610 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6612 if (!optab1)
6613 return false;
6615 vec_mode = TYPE_MODE (vectype);
6616 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6617 return false;
6619 *code1 = c1;
6621 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6622 return true;
6624 /* Check if it's a multi-step conversion that can be done using intermediate
6625 types. */
6626 prev_mode = vec_mode;
6627 if (code == FIX_TRUNC_EXPR)
6628 uns = TYPE_UNSIGNED (vectype_out);
6629 else
6630 uns = TYPE_UNSIGNED (vectype);
6632 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6633 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6634 costly than signed. */
6635 if (code == FIX_TRUNC_EXPR && uns)
6637 enum insn_code icode2;
6639 intermediate_type
6640 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6641 interm_optab
6642 = optab_for_tree_code (c1, intermediate_type, optab_default);
6643 if (interm_optab != unknown_optab
6644 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6645 && insn_data[icode1].operand[0].mode
6646 == insn_data[icode2].operand[0].mode)
6648 uns = false;
6649 optab1 = interm_optab;
6650 icode1 = icode2;
6654 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6655 intermediate steps in promotion sequence. We try
6656 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6657 interm_types->create (MAX_INTERM_CVT_STEPS);
6658 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6660 intermediate_mode = insn_data[icode1].operand[0].mode;
6661 intermediate_type
6662 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6663 interm_optab
6664 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6665 optab_default);
6666 if (!interm_optab
6667 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6668 || insn_data[icode1].operand[0].mode != intermediate_mode
6669 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6670 == CODE_FOR_nothing))
6671 break;
6673 interm_types->quick_push (intermediate_type);
6674 (*multi_step_cvt)++;
6676 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6677 return true;
6679 prev_mode = intermediate_mode;
6680 optab1 = interm_optab;
6683 interm_types->release ();
6684 return false;