Merge trunk version 209848 into gupc branch.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob365cf016213d09a5a8901084f19cd0346ed0feb6
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "tree-eh.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "cfgloop.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "params.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
51 #include "dumpfile.h"
53 /* Pattern recognition functions */
54 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
55 tree *);
56 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
57 tree *);
58 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
59 tree *);
60 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
61 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
62 tree *);
63 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
64 tree *, tree *);
65 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
66 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
67 tree *, tree *);
68 static gimple vect_recog_divmod_pattern (vec<gimple> *,
69 tree *, tree *);
70 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
71 tree *, tree *);
72 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
73 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
74 vect_recog_widen_mult_pattern,
75 vect_recog_widen_sum_pattern,
76 vect_recog_dot_prod_pattern,
77 vect_recog_pow_pattern,
78 vect_recog_widen_shift_pattern,
79 vect_recog_over_widening_pattern,
80 vect_recog_rotate_pattern,
81 vect_recog_vector_vector_shift_pattern,
82 vect_recog_divmod_pattern,
83 vect_recog_mixed_size_cond_pattern,
84 vect_recog_bool_pattern};
86 static inline void
87 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
90 stmt);
93 static inline void
94 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
97 append_pattern_def_seq (stmt_info, stmt);
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
108 static bool
109 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
115 if (!gimple_bb (stmt2))
116 return false;
118 if (loop_vinfo)
120 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
121 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
122 return false;
124 else
126 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
127 || gimple_code (stmt2) == GIMPLE_PHI)
128 return false;
131 gcc_assert (vinfo_for_stmt (stmt2));
132 return true;
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
138 static gimple
139 vect_single_imm_use (gimple def_stmt)
141 tree lhs = gimple_assign_lhs (def_stmt);
142 use_operand_p use_p;
143 gimple use_stmt;
145 if (!single_imm_use (lhs, &use_p, &use_stmt))
146 return NULL;
148 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
149 return NULL;
151 return use_stmt;
154 /* Check whether NAME, an ssa-name used in USE_STMT,
155 is a result of a type promotion or demotion, such that:
156 DEF_STMT: NAME = NOP (name0)
157 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
158 If CHECK_SIGN is TRUE, check that either both types are signed or both are
159 unsigned. */
161 static bool
162 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
163 tree *orig_type, gimple *def_stmt, bool *promotion)
165 tree dummy;
166 gimple dummy_gimple;
167 loop_vec_info loop_vinfo;
168 stmt_vec_info stmt_vinfo;
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
172 tree def;
173 bb_vec_info bb_vinfo;
175 stmt_vinfo = vinfo_for_stmt (use_stmt);
176 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
177 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
178 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
179 &def, &dt))
180 return false;
182 if (dt != vect_internal_def
183 && dt != vect_external_def && dt != vect_constant_def)
184 return false;
186 if (!*def_stmt)
187 return false;
189 if (!is_gimple_assign (*def_stmt))
190 return false;
192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
193 return false;
195 oprnd0 = gimple_assign_rhs1 (*def_stmt);
197 *orig_type = TREE_TYPE (oprnd0);
198 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
199 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
200 return false;
202 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
203 *promotion = true;
204 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
205 *promotion = false;
206 else
207 return false;
209 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
210 bb_vinfo, &dummy_gimple, &dummy, &dt))
211 return false;
213 return true;
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
219 static tree
220 vect_recog_temp_ssa_var (tree type, gimple stmt)
222 return make_temp_ssa_name (type, stmt, "patt");
225 /* Function vect_recog_dot_prod_pattern
227 Try to find the following pattern:
229 type x_t, y_t;
230 TYPE1 prod;
231 TYPE2 sum = init;
232 loop:
233 sum_0 = phi <init, sum_1>
234 S1 x_t = ...
235 S2 y_t = ...
236 S3 x_T = (TYPE1) x_t;
237 S4 y_T = (TYPE1) y_t;
238 S5 prod = x_T * y_T;
239 [S6 prod = (TYPE2) prod; #optional]
240 S7 sum_1 = prod + sum_0;
242 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243 same size of 'TYPE1' or bigger. This is a special case of a reduction
244 computation.
246 Input:
248 * STMTS: Contains a stmt from which the pattern search begins. In the
249 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
250 will be detected.
252 Output:
254 * TYPE_IN: The type of the input arguments to the pattern.
256 * TYPE_OUT: The type of the output of this pattern.
258 * Return value: A new stmt that will be used to replace the sequence of
259 stmts that constitute the pattern. In this case it will be:
260 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
262 Note: The dot-prod idiom is a widening reduction pattern that is
263 vectorized without preserving all the intermediate results. It
264 produces only N/2 (widened) results (by summing up pairs of
265 intermediate results) rather than all N results. Therefore, we
266 cannot allow this pattern when we want to get all the results and in
267 the correct order (as is the case when this computation is in an
268 inner-loop nested in an outer-loop that us being vectorized). */
270 static gimple
271 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
272 tree *type_out)
274 gimple stmt, last_stmt = (*stmts)[0];
275 tree oprnd0, oprnd1;
276 tree oprnd00, oprnd01;
277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
278 tree type, half_type;
279 gimple pattern_stmt;
280 tree prod_type;
281 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
282 struct loop *loop;
283 tree var;
284 bool promotion;
286 if (!loop_info)
287 return NULL;
289 loop = LOOP_VINFO_LOOP (loop_info);
291 if (!is_gimple_assign (last_stmt))
292 return NULL;
294 type = gimple_expr_type (last_stmt);
296 /* Look for the following pattern
297 DX = (TYPE1) X;
298 DY = (TYPE1) Y;
299 DPROD = DX * DY;
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
302 In which
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
320 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
321 return NULL;
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
325 /* Has been detected as widening-summation? */
327 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
328 type = gimple_expr_type (stmt);
329 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
330 return NULL;
331 oprnd0 = gimple_assign_rhs1 (stmt);
332 oprnd1 = gimple_assign_rhs2 (stmt);
333 half_type = TREE_TYPE (oprnd0);
335 else
337 gimple def_stmt;
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
340 return NULL;
341 oprnd0 = gimple_assign_rhs1 (last_stmt);
342 oprnd1 = gimple_assign_rhs2 (last_stmt);
343 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
344 || !types_compatible_p (TREE_TYPE (oprnd1), type))
345 return NULL;
346 stmt = last_stmt;
348 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
349 &promotion)
350 && promotion)
352 stmt = def_stmt;
353 oprnd0 = gimple_assign_rhs1 (stmt);
355 else
356 half_type = type;
359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
363 if (TREE_CODE (oprnd0) != SSA_NAME)
364 return NULL;
366 prod_type = half_type;
367 stmt = SSA_NAME_DEF_STMT (oprnd0);
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
370 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
371 return NULL;
373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
374 inside the loop (in case we are analyzing an outer-loop). */
375 if (!is_gimple_assign (stmt))
376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
380 return NULL;
381 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
382 return NULL;
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
385 /* Has been detected as a widening multiplication? */
387 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
388 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
389 return NULL;
390 stmt_vinfo = vinfo_for_stmt (stmt);
391 gcc_assert (stmt_vinfo);
392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
393 oprnd00 = gimple_assign_rhs1 (stmt);
394 oprnd01 = gimple_assign_rhs2 (stmt);
395 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
396 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
398 else
400 tree half_type0, half_type1;
401 gimple def_stmt;
402 tree oprnd0, oprnd1;
404 oprnd0 = gimple_assign_rhs1 (stmt);
405 oprnd1 = gimple_assign_rhs2 (stmt);
406 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
407 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
408 return NULL;
409 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
410 &promotion)
411 || !promotion)
412 return NULL;
413 oprnd00 = gimple_assign_rhs1 (def_stmt);
414 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
415 &promotion)
416 || !promotion)
417 return NULL;
418 oprnd01 = gimple_assign_rhs1 (def_stmt);
419 if (!types_compatible_p (half_type0, half_type1))
420 return NULL;
421 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
422 return NULL;
425 half_type = TREE_TYPE (oprnd00);
426 *type_in = half_type;
427 *type_out = type;
429 /* Pattern detected. Create a stmt to be used to replace the pattern: */
430 var = vect_recog_temp_ssa_var (type, NULL);
431 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
432 oprnd00, oprnd01, oprnd1);
434 if (dump_enabled_p ())
436 dump_printf_loc (MSG_NOTE, vect_location,
437 "vect_recog_dot_prod_pattern: detected: ");
438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
439 dump_printf (MSG_NOTE, "\n");
442 /* We don't allow changing the order of the computation in the inner-loop
443 when doing outer-loop vectorization. */
444 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
446 return pattern_stmt;
450 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
451 and LSHIFT_EXPR.
453 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
454 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
456 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
457 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
458 that satisfies the above restrictions, we can perform a widening opeartion
459 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
460 with a_it = (interm_type) a_t; */
462 static bool
463 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
464 tree const_oprnd, tree *oprnd,
465 vec<gimple> *stmts, tree type,
466 tree *half_type, gimple def_stmt)
468 tree new_type, new_oprnd;
469 gimple new_stmt;
471 if (code != MULT_EXPR && code != LSHIFT_EXPR)
472 return false;
474 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
475 || (code == LSHIFT_EXPR
476 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
477 != 1))
478 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
480 /* CONST_OPRND is a constant of HALF_TYPE. */
481 *oprnd = gimple_assign_rhs1 (def_stmt);
482 return true;
485 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
486 return false;
488 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
489 return false;
491 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
492 a type 2 times bigger than HALF_TYPE. */
493 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
494 TYPE_UNSIGNED (type));
495 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
496 || (code == LSHIFT_EXPR
497 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
498 return false;
500 /* Use NEW_TYPE for widening operation. */
501 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
503 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
504 /* Check if the already created pattern stmt is what we need. */
505 if (!is_gimple_assign (new_stmt)
506 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
507 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
508 return false;
510 stmts->safe_push (def_stmt);
511 *oprnd = gimple_assign_lhs (new_stmt);
513 else
515 /* Create a_T = (NEW_TYPE) a_t; */
516 *oprnd = gimple_assign_rhs1 (def_stmt);
517 new_oprnd = make_ssa_name (new_type, NULL);
518 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
519 NULL_TREE);
520 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
521 stmts->safe_push (def_stmt);
522 *oprnd = new_oprnd;
525 *half_type = new_type;
526 return true;
530 /* Function vect_recog_widen_mult_pattern
532 Try to find the following pattern:
534 type1 a_t;
535 type2 b_t;
536 TYPE a_T, b_T, prod_T;
538 S1 a_t = ;
539 S2 b_t = ;
540 S3 a_T = (TYPE) a_t;
541 S4 b_T = (TYPE) b_t;
542 S5 prod_T = a_T * b_T;
544 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
546 Also detect unsigned cases:
548 unsigned type1 a_t;
549 unsigned type2 b_t;
550 unsigned TYPE u_prod_T;
551 TYPE a_T, b_T, prod_T;
553 S1 a_t = ;
554 S2 b_t = ;
555 S3 a_T = (TYPE) a_t;
556 S4 b_T = (TYPE) b_t;
557 S5 prod_T = a_T * b_T;
558 S6 u_prod_T = (unsigned TYPE) prod_T;
560 and multiplication by constants:
562 type a_t;
563 TYPE a_T, prod_T;
565 S1 a_t = ;
566 S3 a_T = (TYPE) a_t;
567 S5 prod_T = a_T * CONST;
569 A special case of multiplication by constants is when 'TYPE' is 4 times
570 bigger than 'type', but CONST fits an intermediate type 2 times smaller
571 than 'TYPE'. In that case we create an additional pattern stmt for S3
572 to create a variable of the intermediate type, and perform widen-mult
573 on the intermediate type as well:
575 type a_t;
576 interm_type a_it;
577 TYPE a_T, prod_T, prod_T';
579 S1 a_t = ;
580 S3 a_T = (TYPE) a_t;
581 '--> a_it = (interm_type) a_t;
582 S5 prod_T = a_T * CONST;
583 '--> prod_T' = a_it w* CONST;
585 Input/Output:
587 * STMTS: Contains a stmt from which the pattern search begins. In the
588 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
589 is detected. In case of unsigned widen-mult, the original stmt (S5) is
590 replaced with S6 in STMTS. In case of multiplication by a constant
591 of an intermediate type (the last case above), STMTS also contains S3
592 (inserted before S5).
594 Output:
596 * TYPE_IN: The type of the input arguments to the pattern.
598 * TYPE_OUT: The type of the output of this pattern.
600 * Return value: A new stmt that will be used to replace the sequence of
601 stmts that constitute the pattern. In this case it will be:
602 WIDEN_MULT <a_t, b_t>
603 If the result of WIDEN_MULT needs to be converted to a larger type, the
604 returned stmt will be this type conversion stmt.
607 static gimple
608 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
609 tree *type_in, tree *type_out)
611 gimple last_stmt = stmts->pop ();
612 gimple def_stmt0, def_stmt1;
613 tree oprnd0, oprnd1;
614 tree type, half_type0, half_type1;
615 gimple new_stmt = NULL, pattern_stmt = NULL;
616 tree vectype, vecitype;
617 tree var;
618 enum tree_code dummy_code;
619 int dummy_int;
620 vec<tree> dummy_vec;
621 bool op1_ok;
622 bool promotion;
624 if (!is_gimple_assign (last_stmt))
625 return NULL;
627 type = gimple_expr_type (last_stmt);
629 /* Starting from LAST_STMT, follow the defs of its uses in search
630 of the above pattern. */
632 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
633 return NULL;
635 oprnd0 = gimple_assign_rhs1 (last_stmt);
636 oprnd1 = gimple_assign_rhs2 (last_stmt);
637 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
638 || !types_compatible_p (TREE_TYPE (oprnd1), type))
639 return NULL;
641 /* Check argument 0. */
642 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
643 &promotion)
644 || !promotion)
645 return NULL;
646 /* Check argument 1. */
647 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
648 &def_stmt1, &promotion);
650 if (op1_ok && promotion)
652 oprnd0 = gimple_assign_rhs1 (def_stmt0);
653 oprnd1 = gimple_assign_rhs1 (def_stmt1);
655 else
657 if (TREE_CODE (oprnd1) == INTEGER_CST
658 && TREE_CODE (half_type0) == INTEGER_TYPE
659 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
660 &oprnd0, stmts, type,
661 &half_type0, def_stmt0))
663 half_type1 = half_type0;
664 oprnd1 = fold_convert (half_type1, oprnd1);
666 else
667 return NULL;
670 /* If the two arguments have different sizes, convert the one with
671 the smaller type into the larger type. */
672 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
674 tree* oprnd = NULL;
675 gimple def_stmt = NULL;
677 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
679 def_stmt = def_stmt0;
680 half_type0 = half_type1;
681 oprnd = &oprnd0;
683 else
685 def_stmt = def_stmt1;
686 half_type1 = half_type0;
687 oprnd = &oprnd1;
690 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
691 tree new_oprnd = make_ssa_name (half_type0, NULL);
692 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
693 old_oprnd, NULL_TREE);
694 *oprnd = new_oprnd;
697 /* Handle unsigned case. Look for
698 S6 u_prod_T = (unsigned TYPE) prod_T;
699 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
700 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
702 gimple use_stmt;
703 tree use_lhs;
704 tree use_type;
706 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
707 return NULL;
709 use_stmt = vect_single_imm_use (last_stmt);
710 if (!use_stmt || !is_gimple_assign (use_stmt)
711 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
712 return NULL;
714 use_lhs = gimple_assign_lhs (use_stmt);
715 use_type = TREE_TYPE (use_lhs);
716 if (!INTEGRAL_TYPE_P (use_type)
717 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
718 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
719 return NULL;
721 type = use_type;
722 last_stmt = use_stmt;
725 if (!types_compatible_p (half_type0, half_type1))
726 return NULL;
728 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
729 to get an intermediate result of type ITYPE. In this case we need
730 to build a statement to convert this intermediate result to type TYPE. */
731 tree itype = type;
732 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
733 itype = build_nonstandard_integer_type
734 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
735 TYPE_UNSIGNED (type));
737 /* Pattern detected. */
738 if (dump_enabled_p ())
739 dump_printf_loc (MSG_NOTE, vect_location,
740 "vect_recog_widen_mult_pattern: detected:\n");
742 /* Check target support */
743 vectype = get_vectype_for_scalar_type (half_type0);
744 vecitype = get_vectype_for_scalar_type (itype);
745 if (!vectype
746 || !vecitype
747 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
748 vecitype, vectype,
749 &dummy_code, &dummy_code,
750 &dummy_int, &dummy_vec))
751 return NULL;
753 *type_in = vectype;
754 *type_out = get_vectype_for_scalar_type (type);
756 /* Pattern supported. Create a stmt to be used to replace the pattern: */
757 var = vect_recog_temp_ssa_var (itype, NULL);
758 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
759 oprnd1);
761 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
762 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
763 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
764 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
766 /* If the original two operands have different sizes, we may need to convert
767 the smaller one into the larget type. If this is the case, at this point
768 the new stmt is already built. */
769 if (new_stmt)
771 append_pattern_def_seq (stmt_vinfo, new_stmt);
772 stmt_vec_info new_stmt_info
773 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
774 set_vinfo_for_stmt (new_stmt, new_stmt_info);
775 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
778 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
779 the result of the widen-mult operation into type TYPE. */
780 if (itype != type)
782 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
783 stmt_vec_info pattern_stmt_info
784 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
785 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
786 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
787 pattern_stmt
788 = gimple_build_assign_with_ops (NOP_EXPR,
789 vect_recog_temp_ssa_var (type, NULL),
790 gimple_assign_lhs (pattern_stmt),
791 NULL_TREE);
794 if (dump_enabled_p ())
795 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
797 stmts->safe_push (last_stmt);
798 return pattern_stmt;
802 /* Function vect_recog_pow_pattern
804 Try to find the following pattern:
806 x = POW (y, N);
808 with POW being one of pow, powf, powi, powif and N being
809 either 2 or 0.5.
811 Input:
813 * LAST_STMT: A stmt from which the pattern search begins.
815 Output:
817 * TYPE_IN: The type of the input arguments to the pattern.
819 * TYPE_OUT: The type of the output of this pattern.
821 * Return value: A new stmt that will be used to replace the sequence of
822 stmts that constitute the pattern. In this case it will be:
823 x = x * x
825 x = sqrt (x)
828 static gimple
829 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
830 tree *type_out)
832 gimple last_stmt = (*stmts)[0];
833 tree fn, base, exp = NULL;
834 gimple stmt;
835 tree var;
837 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
838 return NULL;
840 fn = gimple_call_fndecl (last_stmt);
841 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
842 return NULL;
844 switch (DECL_FUNCTION_CODE (fn))
846 case BUILT_IN_POWIF:
847 case BUILT_IN_POWI:
848 case BUILT_IN_POWF:
849 case BUILT_IN_POW:
850 base = gimple_call_arg (last_stmt, 0);
851 exp = gimple_call_arg (last_stmt, 1);
852 if (TREE_CODE (exp) != REAL_CST
853 && TREE_CODE (exp) != INTEGER_CST)
854 return NULL;
855 break;
857 default:
858 return NULL;
861 /* We now have a pow or powi builtin function call with a constant
862 exponent. */
864 *type_out = NULL_TREE;
866 /* Catch squaring. */
867 if ((tree_fits_shwi_p (exp)
868 && tree_to_shwi (exp) == 2)
869 || (TREE_CODE (exp) == REAL_CST
870 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
872 *type_in = TREE_TYPE (base);
874 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
875 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
876 return stmt;
879 /* Catch square root. */
880 if (TREE_CODE (exp) == REAL_CST
881 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
883 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
884 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
885 if (*type_in)
887 gimple stmt = gimple_build_call (newfn, 1, base);
888 if (vectorizable_function (stmt, *type_in, *type_in)
889 != NULL_TREE)
891 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
892 gimple_call_set_lhs (stmt, var);
893 return stmt;
898 return NULL;
902 /* Function vect_recog_widen_sum_pattern
904 Try to find the following pattern:
906 type x_t;
907 TYPE x_T, sum = init;
908 loop:
909 sum_0 = phi <init, sum_1>
910 S1 x_t = *p;
911 S2 x_T = (TYPE) x_t;
912 S3 sum_1 = x_T + sum_0;
914 where type 'TYPE' is at least double the size of type 'type', i.e - we're
915 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
916 a special case of a reduction computation.
918 Input:
920 * LAST_STMT: A stmt from which the pattern search begins. In the example,
921 when this function is called with S3, the pattern {S2,S3} will be detected.
923 Output:
925 * TYPE_IN: The type of the input arguments to the pattern.
927 * TYPE_OUT: The type of the output of this pattern.
929 * Return value: A new stmt that will be used to replace the sequence of
930 stmts that constitute the pattern. In this case it will be:
931 WIDEN_SUM <x_t, sum_0>
933 Note: The widening-sum idiom is a widening reduction pattern that is
934 vectorized without preserving all the intermediate results. It
935 produces only N/2 (widened) results (by summing up pairs of
936 intermediate results) rather than all N results. Therefore, we
937 cannot allow this pattern when we want to get all the results and in
938 the correct order (as is the case when this computation is in an
939 inner-loop nested in an outer-loop that us being vectorized). */
941 static gimple
942 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
943 tree *type_out)
945 gimple stmt, last_stmt = (*stmts)[0];
946 tree oprnd0, oprnd1;
947 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
948 tree type, half_type;
949 gimple pattern_stmt;
950 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
951 struct loop *loop;
952 tree var;
953 bool promotion;
955 if (!loop_info)
956 return NULL;
958 loop = LOOP_VINFO_LOOP (loop_info);
960 if (!is_gimple_assign (last_stmt))
961 return NULL;
963 type = gimple_expr_type (last_stmt);
965 /* Look for the following pattern
966 DX = (TYPE) X;
967 sum_1 = DX + sum_0;
968 In which DX is at least double the size of X, and sum_1 has been
969 recognized as a reduction variable.
972 /* Starting from LAST_STMT, follow the defs of its uses in search
973 of the above pattern. */
975 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
976 return NULL;
978 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
979 return NULL;
981 oprnd0 = gimple_assign_rhs1 (last_stmt);
982 oprnd1 = gimple_assign_rhs2 (last_stmt);
983 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
984 || !types_compatible_p (TREE_TYPE (oprnd1), type))
985 return NULL;
987 /* So far so good. Since last_stmt was detected as a (summation) reduction,
988 we know that oprnd1 is the reduction variable (defined by a loop-header
989 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
990 Left to check that oprnd0 is defined by a cast from type 'type' to type
991 'TYPE'. */
993 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
994 &promotion)
995 || !promotion)
996 return NULL;
998 oprnd0 = gimple_assign_rhs1 (stmt);
999 *type_in = half_type;
1000 *type_out = type;
1002 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1003 var = vect_recog_temp_ssa_var (type, NULL);
1004 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1005 oprnd0, oprnd1);
1007 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_recog_widen_sum_pattern: detected: ");
1011 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1012 dump_printf (MSG_NOTE, "\n");
1015 /* We don't allow changing the order of the computation in the inner-loop
1016 when doing outer-loop vectorization. */
1017 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1019 return pattern_stmt;
1023 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1025 Input:
1026 STMT - a statement to check.
1027 DEF - we support operations with two operands, one of which is constant.
1028 The other operand can be defined by a demotion operation, or by a
1029 previous statement in a sequence of over-promoted operations. In the
1030 later case DEF is used to replace that operand. (It is defined by a
1031 pattern statement we created for the previous statement in the
1032 sequence).
1034 Input/output:
1035 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1036 NULL, it's the type of DEF.
1037 STMTS - additional pattern statements. If a pattern statement (type
1038 conversion) is created in this function, its original statement is
1039 added to STMTS.
1041 Output:
1042 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1043 operands to use in the new pattern statement for STMT (will be created
1044 in vect_recog_over_widening_pattern ()).
1045 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1046 statements for STMT: the first one is a type promotion and the second
1047 one is the operation itself. We return the type promotion statement
1048 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1049 the second pattern statement. */
1051 static bool
1052 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1053 tree *op0, tree *op1, gimple *new_def_stmt,
1054 vec<gimple> *stmts)
1056 enum tree_code code;
1057 tree const_oprnd, oprnd;
1058 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1059 gimple def_stmt, new_stmt;
1060 bool first = false;
1061 bool promotion;
1063 *op0 = NULL_TREE;
1064 *op1 = NULL_TREE;
1065 *new_def_stmt = NULL;
1067 if (!is_gimple_assign (stmt))
1068 return false;
1070 code = gimple_assign_rhs_code (stmt);
1071 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1072 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1073 return false;
1075 oprnd = gimple_assign_rhs1 (stmt);
1076 const_oprnd = gimple_assign_rhs2 (stmt);
1077 type = gimple_expr_type (stmt);
1079 if (TREE_CODE (oprnd) != SSA_NAME
1080 || TREE_CODE (const_oprnd) != INTEGER_CST)
1081 return false;
1083 /* If oprnd has other uses besides that in stmt we cannot mark it
1084 as being part of a pattern only. */
1085 if (!has_single_use (oprnd))
1086 return false;
1088 /* If we are in the middle of a sequence, we use DEF from a previous
1089 statement. Otherwise, OPRND has to be a result of type promotion. */
1090 if (*new_type)
1092 half_type = *new_type;
1093 oprnd = def;
1095 else
1097 first = true;
1098 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1099 &promotion)
1100 || !promotion
1101 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1102 return false;
1105 /* Can we perform the operation on a smaller type? */
1106 switch (code)
1108 case BIT_IOR_EXPR:
1109 case BIT_XOR_EXPR:
1110 case BIT_AND_EXPR:
1111 if (!int_fits_type_p (const_oprnd, half_type))
1113 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1114 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1115 return false;
1117 interm_type = build_nonstandard_integer_type (
1118 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1119 if (!int_fits_type_p (const_oprnd, interm_type))
1120 return false;
1123 break;
1125 case LSHIFT_EXPR:
1126 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1127 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1128 return false;
1130 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1131 (e.g., if the original value was char, the shift amount is at most 8
1132 if we want to use short). */
1133 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1134 return false;
1136 interm_type = build_nonstandard_integer_type (
1137 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1139 if (!vect_supportable_shift (code, interm_type))
1140 return false;
1142 break;
1144 case RSHIFT_EXPR:
1145 if (vect_supportable_shift (code, half_type))
1146 break;
1148 /* Try intermediate type - HALF_TYPE is not supported. */
1149 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1150 return false;
1152 interm_type = build_nonstandard_integer_type (
1153 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1155 if (!vect_supportable_shift (code, interm_type))
1156 return false;
1158 break;
1160 default:
1161 gcc_unreachable ();
1164 /* There are four possible cases:
1165 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1166 the first statement in the sequence)
1167 a. The original, HALF_TYPE, is not enough - we replace the promotion
1168 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1169 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1170 promotion.
1171 2. OPRND is defined by a pattern statement we created.
1172 a. Its type is not sufficient for the operation, we create a new stmt:
1173 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1174 this statement in NEW_DEF_STMT, and it is later put in
1175 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1176 b. OPRND is good to use in the new statement. */
1177 if (first)
1179 if (interm_type)
1181 /* Replace the original type conversion HALF_TYPE->TYPE with
1182 HALF_TYPE->INTERM_TYPE. */
1183 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1185 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1186 /* Check if the already created pattern stmt is what we need. */
1187 if (!is_gimple_assign (new_stmt)
1188 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1189 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1190 return false;
1192 stmts->safe_push (def_stmt);
1193 oprnd = gimple_assign_lhs (new_stmt);
1195 else
1197 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1198 oprnd = gimple_assign_rhs1 (def_stmt);
1199 new_oprnd = make_ssa_name (interm_type, NULL);
1200 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1201 oprnd, NULL_TREE);
1202 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1203 stmts->safe_push (def_stmt);
1204 oprnd = new_oprnd;
1207 else
1209 /* Retrieve the operand before the type promotion. */
1210 oprnd = gimple_assign_rhs1 (def_stmt);
1213 else
1215 if (interm_type)
1217 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1218 new_oprnd = make_ssa_name (interm_type, NULL);
1219 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1220 oprnd, NULL_TREE);
1221 oprnd = new_oprnd;
1222 *new_def_stmt = new_stmt;
1225 /* Otherwise, OPRND is already set. */
1228 if (interm_type)
1229 *new_type = interm_type;
1230 else
1231 *new_type = half_type;
1233 *op0 = oprnd;
1234 *op1 = fold_convert (*new_type, const_oprnd);
1236 return true;
1240 /* Try to find a statement or a sequence of statements that can be performed
1241 on a smaller type:
1243 type x_t;
1244 TYPE x_T, res0_T, res1_T;
1245 loop:
1246 S1 x_t = *p;
1247 S2 x_T = (TYPE) x_t;
1248 S3 res0_T = op (x_T, C0);
1249 S4 res1_T = op (res0_T, C1);
1250 S5 ... = () res1_T; - type demotion
1252 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1253 constants.
1254 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1255 be 'type' or some intermediate type. For now, we expect S5 to be a type
1256 demotion operation. We also check that S3 and S4 have only one use. */
1258 static gimple
1259 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1260 tree *type_in, tree *type_out)
1262 gimple stmt = stmts->pop ();
1263 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1264 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1265 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1266 bool first;
1267 tree type = NULL;
1269 first = true;
1270 while (1)
1272 if (!vinfo_for_stmt (stmt)
1273 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1274 return NULL;
1276 new_def_stmt = NULL;
1277 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1278 &op0, &op1, &new_def_stmt,
1279 stmts))
1281 if (first)
1282 return NULL;
1283 else
1284 break;
1287 /* STMT can be performed on a smaller type. Check its uses. */
1288 use_stmt = vect_single_imm_use (stmt);
1289 if (!use_stmt || !is_gimple_assign (use_stmt))
1290 return NULL;
1292 /* Create pattern statement for STMT. */
1293 vectype = get_vectype_for_scalar_type (new_type);
1294 if (!vectype)
1295 return NULL;
1297 /* We want to collect all the statements for which we create pattern
1298 statetments, except for the case when the last statement in the
1299 sequence doesn't have a corresponding pattern statement. In such
1300 case we associate the last pattern statement with the last statement
1301 in the sequence. Therefore, we only add the original statement to
1302 the list if we know that it is not the last. */
1303 if (prev_stmt)
1304 stmts->safe_push (prev_stmt);
1306 var = vect_recog_temp_ssa_var (new_type, NULL);
1307 pattern_stmt
1308 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1309 op0, op1);
1310 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1311 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1313 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_NOTE, vect_location,
1316 "created pattern stmt: ");
1317 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1318 dump_printf (MSG_NOTE, "\n");
1321 type = gimple_expr_type (stmt);
1322 prev_stmt = stmt;
1323 stmt = use_stmt;
1325 first = false;
1328 /* We got a sequence. We expect it to end with a type demotion operation.
1329 Otherwise, we quit (for now). There are three possible cases: the
1330 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1331 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1332 NEW_TYPE differs (we create a new conversion statement). */
1333 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1335 use_lhs = gimple_assign_lhs (use_stmt);
1336 use_type = TREE_TYPE (use_lhs);
1337 /* Support only type demotion or signedess change. */
1338 if (!INTEGRAL_TYPE_P (use_type)
1339 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1340 return NULL;
1342 /* Check that NEW_TYPE is not bigger than the conversion result. */
1343 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1344 return NULL;
1346 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1347 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1349 /* Create NEW_TYPE->USE_TYPE conversion. */
1350 new_oprnd = make_ssa_name (use_type, NULL);
1351 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1352 var, NULL_TREE);
1353 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1355 *type_in = get_vectype_for_scalar_type (new_type);
1356 *type_out = get_vectype_for_scalar_type (use_type);
1358 /* We created a pattern statement for the last statement in the
1359 sequence, so we don't need to associate it with the pattern
1360 statement created for PREV_STMT. Therefore, we add PREV_STMT
1361 to the list in order to mark it later in vect_pattern_recog_1. */
1362 if (prev_stmt)
1363 stmts->safe_push (prev_stmt);
1365 else
1367 if (prev_stmt)
1368 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1369 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1371 *type_in = vectype;
1372 *type_out = NULL_TREE;
1375 stmts->safe_push (use_stmt);
1377 else
1378 /* TODO: support general case, create a conversion to the correct type. */
1379 return NULL;
1381 /* Pattern detected. */
1382 if (dump_enabled_p ())
1384 dump_printf_loc (MSG_NOTE, vect_location,
1385 "vect_recog_over_widening_pattern: detected: ");
1386 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1387 dump_printf (MSG_NOTE, "\n");
1390 return pattern_stmt;
1393 /* Detect widening shift pattern:
1395 type a_t;
1396 TYPE a_T, res_T;
1398 S1 a_t = ;
1399 S2 a_T = (TYPE) a_t;
1400 S3 res_T = a_T << CONST;
1402 where type 'TYPE' is at least double the size of type 'type'.
1404 Also detect cases where the shift result is immediately converted
1405 to another type 'result_type' that is no larger in size than 'TYPE'.
1406 In those cases we perform a widen-shift that directly results in
1407 'result_type', to avoid a possible over-widening situation:
1409 type a_t;
1410 TYPE a_T, res_T;
1411 result_type res_result;
1413 S1 a_t = ;
1414 S2 a_T = (TYPE) a_t;
1415 S3 res_T = a_T << CONST;
1416 S4 res_result = (result_type) res_T;
1417 '--> res_result' = a_t w<< CONST;
1419 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1420 create an additional pattern stmt for S2 to create a variable of an
1421 intermediate type, and perform widen-shift on the intermediate type:
1423 type a_t;
1424 interm_type a_it;
1425 TYPE a_T, res_T, res_T';
1427 S1 a_t = ;
1428 S2 a_T = (TYPE) a_t;
1429 '--> a_it = (interm_type) a_t;
1430 S3 res_T = a_T << CONST;
1431 '--> res_T' = a_it <<* CONST;
1433 Input/Output:
1435 * STMTS: Contains a stmt from which the pattern search begins.
1436 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1437 in STMTS. When an intermediate type is used and a pattern statement is
1438 created for S2, we also put S2 here (before S3).
1440 Output:
1442 * TYPE_IN: The type of the input arguments to the pattern.
1444 * TYPE_OUT: The type of the output of this pattern.
1446 * Return value: A new stmt that will be used to replace the sequence of
1447 stmts that constitute the pattern. In this case it will be:
1448 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1450 static gimple
1451 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1452 tree *type_in, tree *type_out)
1454 gimple last_stmt = stmts->pop ();
1455 gimple def_stmt0;
1456 tree oprnd0, oprnd1;
1457 tree type, half_type0;
1458 gimple pattern_stmt;
1459 tree vectype, vectype_out = NULL_TREE;
1460 tree var;
1461 enum tree_code dummy_code;
1462 int dummy_int;
1463 vec<tree> dummy_vec;
1464 gimple use_stmt;
1465 bool promotion;
1467 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1468 return NULL;
1470 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1471 return NULL;
1473 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1474 return NULL;
1476 oprnd0 = gimple_assign_rhs1 (last_stmt);
1477 oprnd1 = gimple_assign_rhs2 (last_stmt);
1478 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1479 return NULL;
1481 /* Check operand 0: it has to be defined by a type promotion. */
1482 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1483 &promotion)
1484 || !promotion)
1485 return NULL;
1487 /* Check operand 1: has to be positive. We check that it fits the type
1488 in vect_handle_widen_op_by_const (). */
1489 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1490 return NULL;
1492 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1493 type = gimple_expr_type (last_stmt);
1495 /* Check for subsequent conversion to another type. */
1496 use_stmt = vect_single_imm_use (last_stmt);
1497 if (use_stmt && is_gimple_assign (use_stmt)
1498 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1499 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1501 tree use_lhs = gimple_assign_lhs (use_stmt);
1502 tree use_type = TREE_TYPE (use_lhs);
1504 if (INTEGRAL_TYPE_P (use_type)
1505 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1507 last_stmt = use_stmt;
1508 type = use_type;
1512 /* Check if this a widening operation. */
1513 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1514 &oprnd0, stmts,
1515 type, &half_type0, def_stmt0))
1516 return NULL;
1518 /* Pattern detected. */
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "vect_recog_widen_shift_pattern: detected:\n");
1523 /* Check target support. */
1524 vectype = get_vectype_for_scalar_type (half_type0);
1525 vectype_out = get_vectype_for_scalar_type (type);
1527 if (!vectype
1528 || !vectype_out
1529 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1530 vectype_out, vectype,
1531 &dummy_code, &dummy_code,
1532 &dummy_int, &dummy_vec))
1533 return NULL;
1535 *type_in = vectype;
1536 *type_out = vectype_out;
1538 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1539 var = vect_recog_temp_ssa_var (type, NULL);
1540 pattern_stmt =
1541 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1543 if (dump_enabled_p ())
1544 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1546 stmts->safe_push (last_stmt);
1547 return pattern_stmt;
1550 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1552 type a_t, b_t, c_t;
1554 S0 a_t = b_t r<< c_t;
1556 Input/Output:
1558 * STMTS: Contains a stmt from which the pattern search begins,
1559 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1560 with a sequence:
1562 S1 d_t = -c_t;
1563 S2 e_t = d_t & (B - 1);
1564 S3 f_t = b_t << c_t;
1565 S4 g_t = b_t >> e_t;
1566 S0 a_t = f_t | g_t;
1568 where B is element bitsize of type.
1570 Output:
1572 * TYPE_IN: The type of the input arguments to the pattern.
1574 * TYPE_OUT: The type of the output of this pattern.
1576 * Return value: A new stmt that will be used to replace the rotate
1577 S0 stmt. */
1579 static gimple
1580 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1582 gimple last_stmt = stmts->pop ();
1583 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1584 gimple pattern_stmt, def_stmt;
1585 enum tree_code rhs_code;
1586 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1588 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1589 enum vect_def_type dt;
1590 optab optab1, optab2;
1591 edge ext_def = NULL;
1593 if (!is_gimple_assign (last_stmt))
1594 return NULL;
1596 rhs_code = gimple_assign_rhs_code (last_stmt);
1597 switch (rhs_code)
1599 case LROTATE_EXPR:
1600 case RROTATE_EXPR:
1601 break;
1602 default:
1603 return NULL;
1606 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1607 return NULL;
1609 lhs = gimple_assign_lhs (last_stmt);
1610 oprnd0 = gimple_assign_rhs1 (last_stmt);
1611 type = TREE_TYPE (oprnd0);
1612 oprnd1 = gimple_assign_rhs2 (last_stmt);
1613 if (TREE_CODE (oprnd0) != SSA_NAME
1614 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1615 || !INTEGRAL_TYPE_P (type)
1616 || !TYPE_UNSIGNED (type))
1617 return NULL;
1619 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1620 &def, &dt))
1621 return NULL;
1623 if (dt != vect_internal_def
1624 && dt != vect_constant_def
1625 && dt != vect_external_def)
1626 return NULL;
1628 vectype = get_vectype_for_scalar_type (type);
1629 if (vectype == NULL_TREE)
1630 return NULL;
1632 /* If vector/vector or vector/scalar rotate is supported by the target,
1633 don't do anything here. */
1634 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1635 if (optab1
1636 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1637 return NULL;
1639 if (bb_vinfo != NULL || dt != vect_internal_def)
1641 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1642 if (optab2
1643 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1644 return NULL;
1647 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1648 don't do anything here either. */
1649 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1650 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1651 if (!optab1
1652 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1653 || !optab2
1654 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1656 if (bb_vinfo == NULL && dt == vect_internal_def)
1657 return NULL;
1658 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1659 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1660 if (!optab1
1661 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1662 || !optab2
1663 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1664 return NULL;
1667 *type_in = vectype;
1668 *type_out = vectype;
1669 if (*type_in == NULL_TREE)
1670 return NULL;
1672 if (dt == vect_external_def
1673 && TREE_CODE (oprnd1) == SSA_NAME
1674 && loop_vinfo)
1676 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1677 ext_def = loop_preheader_edge (loop);
1678 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1680 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1681 if (bb == NULL
1682 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1683 ext_def = NULL;
1687 def = NULL_TREE;
1688 if (TREE_CODE (oprnd1) == INTEGER_CST
1689 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1690 def = oprnd1;
1691 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1693 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1694 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1695 && TYPE_PRECISION (TREE_TYPE (rhs1))
1696 == TYPE_PRECISION (type))
1697 def = rhs1;
1700 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1701 if (def == NULL_TREE)
1703 def = vect_recog_temp_ssa_var (type, NULL);
1704 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1705 NULL_TREE);
1706 if (ext_def)
1708 basic_block new_bb
1709 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1710 gcc_assert (!new_bb);
1712 else
1713 append_pattern_def_seq (stmt_vinfo, def_stmt);
1715 stype = TREE_TYPE (def);
1717 if (TREE_CODE (def) == INTEGER_CST)
1719 if (!tree_fits_uhwi_p (def)
1720 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1721 || integer_zerop (def))
1722 return NULL;
1723 def2 = build_int_cst (stype,
1724 GET_MODE_PRECISION (TYPE_MODE (type))
1725 - tree_to_uhwi (def));
1727 else
1729 tree vecstype = get_vectype_for_scalar_type (stype);
1730 stmt_vec_info def_stmt_vinfo;
1732 if (vecstype == NULL_TREE)
1733 return NULL;
1734 def2 = vect_recog_temp_ssa_var (stype, NULL);
1735 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1736 NULL_TREE);
1737 if (ext_def)
1739 basic_block new_bb
1740 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1741 gcc_assert (!new_bb);
1743 else
1745 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1746 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1747 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1748 append_pattern_def_seq (stmt_vinfo, def_stmt);
1751 def2 = vect_recog_temp_ssa_var (stype, NULL);
1752 tree mask
1753 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1754 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1755 gimple_assign_lhs (def_stmt),
1756 mask);
1757 if (ext_def)
1759 basic_block new_bb
1760 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1761 gcc_assert (!new_bb);
1763 else
1765 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1766 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1767 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1768 append_pattern_def_seq (stmt_vinfo, def_stmt);
1772 var1 = vect_recog_temp_ssa_var (type, NULL);
1773 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1774 ? LSHIFT_EXPR : RSHIFT_EXPR,
1775 var1, oprnd0, def);
1776 append_pattern_def_seq (stmt_vinfo, def_stmt);
1778 var2 = vect_recog_temp_ssa_var (type, NULL);
1779 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1780 ? RSHIFT_EXPR : LSHIFT_EXPR,
1781 var2, oprnd0, def2);
1782 append_pattern_def_seq (stmt_vinfo, def_stmt);
1784 /* Pattern detected. */
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE, vect_location,
1787 "vect_recog_rotate_pattern: detected:\n");
1789 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1790 var = vect_recog_temp_ssa_var (type, NULL);
1791 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1793 if (dump_enabled_p ())
1794 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1796 stmts->safe_push (last_stmt);
1797 return pattern_stmt;
1800 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1801 vectorized:
1803 type a_t;
1804 TYPE b_T, res_T;
1806 S1 a_t = ;
1807 S2 b_T = ;
1808 S3 res_T = b_T op a_t;
1810 where type 'TYPE' is a type with different size than 'type',
1811 and op is <<, >> or rotate.
1813 Also detect cases:
1815 type a_t;
1816 TYPE b_T, c_T, res_T;
1818 S0 c_T = ;
1819 S1 a_t = (type) c_T;
1820 S2 b_T = ;
1821 S3 res_T = b_T op a_t;
1823 Input/Output:
1825 * STMTS: Contains a stmt from which the pattern search begins,
1826 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1827 with a shift/rotate which has same type on both operands, in the
1828 second case just b_T op c_T, in the first case with added cast
1829 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1831 Output:
1833 * TYPE_IN: The type of the input arguments to the pattern.
1835 * TYPE_OUT: The type of the output of this pattern.
1837 * Return value: A new stmt that will be used to replace the shift/rotate
1838 S3 stmt. */
1840 static gimple
1841 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1842 tree *type_in, tree *type_out)
1844 gimple last_stmt = stmts->pop ();
1845 tree oprnd0, oprnd1, lhs, var;
1846 gimple pattern_stmt, def_stmt;
1847 enum tree_code rhs_code;
1848 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1849 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1850 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1851 enum vect_def_type dt;
1852 tree def;
1854 if (!is_gimple_assign (last_stmt))
1855 return NULL;
1857 rhs_code = gimple_assign_rhs_code (last_stmt);
1858 switch (rhs_code)
1860 case LSHIFT_EXPR:
1861 case RSHIFT_EXPR:
1862 case LROTATE_EXPR:
1863 case RROTATE_EXPR:
1864 break;
1865 default:
1866 return NULL;
1869 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1870 return NULL;
1872 lhs = gimple_assign_lhs (last_stmt);
1873 oprnd0 = gimple_assign_rhs1 (last_stmt);
1874 oprnd1 = gimple_assign_rhs2 (last_stmt);
1875 if (TREE_CODE (oprnd0) != SSA_NAME
1876 || TREE_CODE (oprnd1) != SSA_NAME
1877 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1878 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1879 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1880 || TYPE_PRECISION (TREE_TYPE (lhs))
1881 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1882 return NULL;
1884 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1885 &def, &dt))
1886 return NULL;
1888 if (dt != vect_internal_def)
1889 return NULL;
1891 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1892 *type_out = *type_in;
1893 if (*type_in == NULL_TREE)
1894 return NULL;
1896 def = NULL_TREE;
1897 if (gimple_assign_cast_p (def_stmt))
1899 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1900 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1901 && TYPE_PRECISION (TREE_TYPE (rhs1))
1902 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1903 def = rhs1;
1906 if (def == NULL_TREE)
1908 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1909 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1910 NULL_TREE);
1911 new_pattern_def_seq (stmt_vinfo, def_stmt);
1914 /* Pattern detected. */
1915 if (dump_enabled_p ())
1916 dump_printf_loc (MSG_NOTE, vect_location,
1917 "vect_recog_vector_vector_shift_pattern: detected:\n");
1919 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1920 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1921 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1923 if (dump_enabled_p ())
1924 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1926 stmts->safe_push (last_stmt);
1927 return pattern_stmt;
1930 /* Detect a signed division by a constant that wouldn't be
1931 otherwise vectorized:
1933 type a_t, b_t;
1935 S1 a_t = b_t / N;
1937 where type 'type' is an integral type and N is a constant.
1939 Similarly handle modulo by a constant:
1941 S4 a_t = b_t % N;
1943 Input/Output:
1945 * STMTS: Contains a stmt from which the pattern search begins,
1946 i.e. the division stmt. S1 is replaced by if N is a power
1947 of two constant and type is signed:
1948 S3 y_t = b_t < 0 ? N - 1 : 0;
1949 S2 x_t = b_t + y_t;
1950 S1' a_t = x_t >> log2 (N);
1952 S4 is replaced if N is a power of two constant and
1953 type is signed by (where *_T temporaries have unsigned type):
1954 S9 y_T = b_t < 0 ? -1U : 0U;
1955 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1956 S7 z_t = (type) z_T;
1957 S6 w_t = b_t + z_t;
1958 S5 x_t = w_t & (N - 1);
1959 S4' a_t = x_t - z_t;
1961 Output:
1963 * TYPE_IN: The type of the input arguments to the pattern.
1965 * TYPE_OUT: The type of the output of this pattern.
1967 * Return value: A new stmt that will be used to replace the division
1968 S1 or modulo S4 stmt. */
1970 static gimple
1971 vect_recog_divmod_pattern (vec<gimple> *stmts,
1972 tree *type_in, tree *type_out)
1974 gimple last_stmt = stmts->pop ();
1975 tree oprnd0, oprnd1, vectype, itype, cond;
1976 gimple pattern_stmt, def_stmt;
1977 enum tree_code rhs_code;
1978 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1979 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1980 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1981 optab optab;
1982 tree q;
1983 int dummy_int, prec;
1984 stmt_vec_info def_stmt_vinfo;
1986 if (!is_gimple_assign (last_stmt))
1987 return NULL;
1989 rhs_code = gimple_assign_rhs_code (last_stmt);
1990 switch (rhs_code)
1992 case TRUNC_DIV_EXPR:
1993 case TRUNC_MOD_EXPR:
1994 break;
1995 default:
1996 return NULL;
1999 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2000 return NULL;
2002 oprnd0 = gimple_assign_rhs1 (last_stmt);
2003 oprnd1 = gimple_assign_rhs2 (last_stmt);
2004 itype = TREE_TYPE (oprnd0);
2005 if (TREE_CODE (oprnd0) != SSA_NAME
2006 || TREE_CODE (oprnd1) != INTEGER_CST
2007 || TREE_CODE (itype) != INTEGER_TYPE
2008 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2009 return NULL;
2011 vectype = get_vectype_for_scalar_type (itype);
2012 if (vectype == NULL_TREE)
2013 return NULL;
2015 /* If the target can handle vectorized division or modulo natively,
2016 don't attempt to optimize this. */
2017 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2018 if (optab != unknown_optab)
2020 enum machine_mode vec_mode = TYPE_MODE (vectype);
2021 int icode = (int) optab_handler (optab, vec_mode);
2022 if (icode != CODE_FOR_nothing)
2023 return NULL;
2026 prec = TYPE_PRECISION (itype);
2027 if (integer_pow2p (oprnd1))
2029 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2030 return NULL;
2032 /* Pattern detected. */
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_NOTE, vect_location,
2035 "vect_recog_divmod_pattern: detected:\n");
2037 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2038 build_int_cst (itype, 0));
2039 if (rhs_code == TRUNC_DIV_EXPR)
2041 tree var = vect_recog_temp_ssa_var (itype, NULL);
2042 tree shift;
2043 def_stmt
2044 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2045 fold_build2 (MINUS_EXPR, itype,
2046 oprnd1,
2047 build_int_cst (itype,
2048 1)),
2049 build_int_cst (itype, 0));
2050 new_pattern_def_seq (stmt_vinfo, def_stmt);
2051 var = vect_recog_temp_ssa_var (itype, NULL);
2052 def_stmt
2053 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2054 gimple_assign_lhs (def_stmt));
2055 append_pattern_def_seq (stmt_vinfo, def_stmt);
2057 shift = build_int_cst (itype, tree_log2 (oprnd1));
2058 pattern_stmt
2059 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2060 vect_recog_temp_ssa_var (itype,
2061 NULL),
2062 var, shift);
2064 else
2066 tree signmask;
2067 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2068 if (compare_tree_int (oprnd1, 2) == 0)
2070 signmask = vect_recog_temp_ssa_var (itype, NULL);
2071 def_stmt
2072 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2073 build_int_cst (itype, 1),
2074 build_int_cst (itype, 0));
2075 append_pattern_def_seq (stmt_vinfo, def_stmt);
2077 else
2079 tree utype
2080 = build_nonstandard_integer_type (prec, 1);
2081 tree vecutype = get_vectype_for_scalar_type (utype);
2082 tree shift
2083 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2084 - tree_log2 (oprnd1));
2085 tree var = vect_recog_temp_ssa_var (utype, NULL);
2087 def_stmt
2088 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2089 build_int_cst (utype, -1),
2090 build_int_cst (utype, 0));
2091 def_stmt_vinfo
2092 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2093 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2094 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2095 append_pattern_def_seq (stmt_vinfo, def_stmt);
2096 var = vect_recog_temp_ssa_var (utype, NULL);
2097 def_stmt
2098 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2099 gimple_assign_lhs (def_stmt),
2100 shift);
2101 def_stmt_vinfo
2102 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2103 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2104 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2105 append_pattern_def_seq (stmt_vinfo, def_stmt);
2106 signmask = vect_recog_temp_ssa_var (itype, NULL);
2107 def_stmt
2108 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2109 NULL_TREE);
2110 append_pattern_def_seq (stmt_vinfo, def_stmt);
2112 def_stmt
2113 = gimple_build_assign_with_ops (PLUS_EXPR,
2114 vect_recog_temp_ssa_var (itype,
2115 NULL),
2116 oprnd0, signmask);
2117 append_pattern_def_seq (stmt_vinfo, def_stmt);
2118 def_stmt
2119 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2120 vect_recog_temp_ssa_var (itype,
2121 NULL),
2122 gimple_assign_lhs (def_stmt),
2123 fold_build2 (MINUS_EXPR, itype,
2124 oprnd1,
2125 build_int_cst (itype,
2126 1)));
2127 append_pattern_def_seq (stmt_vinfo, def_stmt);
2129 pattern_stmt
2130 = gimple_build_assign_with_ops (MINUS_EXPR,
2131 vect_recog_temp_ssa_var (itype,
2132 NULL),
2133 gimple_assign_lhs (def_stmt),
2134 signmask);
2137 if (dump_enabled_p ())
2138 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2141 stmts->safe_push (last_stmt);
2143 *type_in = vectype;
2144 *type_out = vectype;
2145 return pattern_stmt;
2148 if (prec > HOST_BITS_PER_WIDE_INT
2149 || integer_zerop (oprnd1))
2150 return NULL;
2152 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2153 return NULL;
2155 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2157 if (TYPE_UNSIGNED (itype))
2159 unsigned HOST_WIDE_INT mh, ml;
2160 int pre_shift, post_shift;
2161 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2162 & GET_MODE_MASK (TYPE_MODE (itype)));
2163 tree t1, t2, t3, t4;
2165 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2166 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2167 return NULL;
2169 /* Find a suitable multiplier and right shift count
2170 instead of multiplying with D. */
2171 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2173 /* If the suggested multiplier is more than SIZE bits, we can do better
2174 for even divisors, using an initial right shift. */
2175 if (mh != 0 && (d & 1) == 0)
2177 pre_shift = floor_log2 (d & -d);
2178 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2179 &ml, &post_shift, &dummy_int);
2180 gcc_assert (!mh);
2182 else
2183 pre_shift = 0;
2185 if (mh != 0)
2187 if (post_shift - 1 >= prec)
2188 return NULL;
2190 /* t1 = oprnd0 h* ml;
2191 t2 = oprnd0 - t1;
2192 t3 = t2 >> 1;
2193 t4 = t1 + t3;
2194 q = t4 >> (post_shift - 1); */
2195 t1 = vect_recog_temp_ssa_var (itype, NULL);
2196 def_stmt
2197 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2198 build_int_cst (itype, ml));
2199 append_pattern_def_seq (stmt_vinfo, def_stmt);
2201 t2 = vect_recog_temp_ssa_var (itype, NULL);
2202 def_stmt
2203 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2204 append_pattern_def_seq (stmt_vinfo, def_stmt);
2206 t3 = vect_recog_temp_ssa_var (itype, NULL);
2207 def_stmt
2208 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2209 integer_one_node);
2210 append_pattern_def_seq (stmt_vinfo, def_stmt);
2212 t4 = vect_recog_temp_ssa_var (itype, NULL);
2213 def_stmt
2214 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2216 if (post_shift != 1)
2218 append_pattern_def_seq (stmt_vinfo, def_stmt);
2220 q = vect_recog_temp_ssa_var (itype, NULL);
2221 pattern_stmt
2222 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2223 build_int_cst (itype,
2224 post_shift
2225 - 1));
2227 else
2229 q = t4;
2230 pattern_stmt = def_stmt;
2233 else
2235 if (pre_shift >= prec || post_shift >= prec)
2236 return NULL;
2238 /* t1 = oprnd0 >> pre_shift;
2239 t2 = t1 h* ml;
2240 q = t2 >> post_shift; */
2241 if (pre_shift)
2243 t1 = vect_recog_temp_ssa_var (itype, NULL);
2244 def_stmt
2245 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2246 build_int_cst (NULL,
2247 pre_shift));
2248 append_pattern_def_seq (stmt_vinfo, def_stmt);
2250 else
2251 t1 = oprnd0;
2253 t2 = vect_recog_temp_ssa_var (itype, NULL);
2254 def_stmt
2255 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2256 build_int_cst (itype, ml));
2258 if (post_shift)
2260 append_pattern_def_seq (stmt_vinfo, def_stmt);
2262 q = vect_recog_temp_ssa_var (itype, NULL);
2263 def_stmt
2264 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2265 build_int_cst (itype,
2266 post_shift));
2268 else
2269 q = t2;
2271 pattern_stmt = def_stmt;
2274 else
2276 unsigned HOST_WIDE_INT ml;
2277 int post_shift;
2278 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2279 unsigned HOST_WIDE_INT abs_d;
2280 bool add = false;
2281 tree t1, t2, t3, t4;
2283 /* Give up for -1. */
2284 if (d == -1)
2285 return NULL;
2287 /* Since d might be INT_MIN, we have to cast to
2288 unsigned HOST_WIDE_INT before negating to avoid
2289 undefined signed overflow. */
2290 abs_d = (d >= 0
2291 ? (unsigned HOST_WIDE_INT) d
2292 : - (unsigned HOST_WIDE_INT) d);
2294 /* n rem d = n rem -d */
2295 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2297 d = abs_d;
2298 oprnd1 = build_int_cst (itype, abs_d);
2300 else if (HOST_BITS_PER_WIDE_INT >= prec
2301 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2302 /* This case is not handled correctly below. */
2303 return NULL;
2305 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2306 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2308 add = true;
2309 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2311 if (post_shift >= prec)
2312 return NULL;
2314 /* t1 = oprnd0 h* ml; */
2315 t1 = vect_recog_temp_ssa_var (itype, NULL);
2316 def_stmt
2317 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2318 build_int_cst (itype, ml));
2320 if (add)
2322 /* t2 = t1 + oprnd0; */
2323 append_pattern_def_seq (stmt_vinfo, def_stmt);
2324 t2 = vect_recog_temp_ssa_var (itype, NULL);
2325 def_stmt
2326 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2328 else
2329 t2 = t1;
2331 if (post_shift)
2333 /* t3 = t2 >> post_shift; */
2334 append_pattern_def_seq (stmt_vinfo, def_stmt);
2335 t3 = vect_recog_temp_ssa_var (itype, NULL);
2336 def_stmt
2337 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2338 build_int_cst (itype, post_shift));
2340 else
2341 t3 = t2;
2343 double_int oprnd0_min, oprnd0_max;
2344 int msb = 1;
2345 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2347 if (!oprnd0_min.is_negative ())
2348 msb = 0;
2349 else if (oprnd0_max.is_negative ())
2350 msb = -1;
2353 if (msb == 0 && d >= 0)
2355 /* q = t3; */
2356 q = t3;
2357 pattern_stmt = def_stmt;
2359 else
2361 /* t4 = oprnd0 >> (prec - 1);
2362 or if we know from VRP that oprnd0 >= 0
2363 t4 = 0;
2364 or if we know from VRP that oprnd0 < 0
2365 t4 = -1; */
2366 append_pattern_def_seq (stmt_vinfo, def_stmt);
2367 t4 = vect_recog_temp_ssa_var (itype, NULL);
2368 if (msb != 1)
2369 def_stmt
2370 = gimple_build_assign_with_ops (INTEGER_CST,
2371 t4, build_int_cst (itype, msb),
2372 NULL_TREE);
2373 else
2374 def_stmt
2375 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2376 build_int_cst (itype, prec - 1));
2377 append_pattern_def_seq (stmt_vinfo, def_stmt);
2379 /* q = t3 - t4; or q = t4 - t3; */
2380 q = vect_recog_temp_ssa_var (itype, NULL);
2381 pattern_stmt
2382 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2383 d < 0 ? t3 : t4);
2387 if (rhs_code == TRUNC_MOD_EXPR)
2389 tree r, t1;
2391 /* We divided. Now finish by:
2392 t1 = q * oprnd1;
2393 r = oprnd0 - t1; */
2394 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2396 t1 = vect_recog_temp_ssa_var (itype, NULL);
2397 def_stmt
2398 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2399 append_pattern_def_seq (stmt_vinfo, def_stmt);
2401 r = vect_recog_temp_ssa_var (itype, NULL);
2402 pattern_stmt
2403 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2406 /* Pattern detected. */
2407 if (dump_enabled_p ())
2409 dump_printf_loc (MSG_NOTE, vect_location,
2410 "vect_recog_divmod_pattern: detected: ");
2411 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2412 dump_printf (MSG_NOTE, "\n");
2415 stmts->safe_push (last_stmt);
2417 *type_in = vectype;
2418 *type_out = vectype;
2419 return pattern_stmt;
2422 /* Function vect_recog_mixed_size_cond_pattern
2424 Try to find the following pattern:
2426 type x_t, y_t;
2427 TYPE a_T, b_T, c_T;
2428 loop:
2429 S1 a_T = x_t CMP y_t ? b_T : c_T;
2431 where type 'TYPE' is an integral type which has different size
2432 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2433 than 'type', the constants need to fit into an integer type
2434 with the same width as 'type') or results of conversion from 'type'.
2436 Input:
2438 * LAST_STMT: A stmt from which the pattern search begins.
2440 Output:
2442 * TYPE_IN: The type of the input arguments to the pattern.
2444 * TYPE_OUT: The type of the output of this pattern.
2446 * Return value: A new stmt that will be used to replace the pattern.
2447 Additionally a def_stmt is added.
2449 a_it = x_t CMP y_t ? b_it : c_it;
2450 a_T = (TYPE) a_it; */
2452 static gimple
2453 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2454 tree *type_out)
2456 gimple last_stmt = (*stmts)[0];
2457 tree cond_expr, then_clause, else_clause;
2458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2459 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2460 enum machine_mode cmpmode;
2461 gimple pattern_stmt, def_stmt;
2462 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2463 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2464 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2465 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2466 bool promotion;
2467 tree comp_scalar_type;
2469 if (!is_gimple_assign (last_stmt)
2470 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2471 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2472 return NULL;
2474 cond_expr = gimple_assign_rhs1 (last_stmt);
2475 then_clause = gimple_assign_rhs2 (last_stmt);
2476 else_clause = gimple_assign_rhs3 (last_stmt);
2478 if (!COMPARISON_CLASS_P (cond_expr))
2479 return NULL;
2481 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2482 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2483 if (comp_vectype == NULL_TREE)
2484 return NULL;
2486 type = gimple_expr_type (last_stmt);
2487 if (types_compatible_p (type, comp_scalar_type)
2488 || ((TREE_CODE (then_clause) != INTEGER_CST
2489 || TREE_CODE (else_clause) != INTEGER_CST)
2490 && !INTEGRAL_TYPE_P (comp_scalar_type))
2491 || !INTEGRAL_TYPE_P (type))
2492 return NULL;
2494 if ((TREE_CODE (then_clause) != INTEGER_CST
2495 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2496 &def_stmt0, &promotion))
2497 || (TREE_CODE (else_clause) != INTEGER_CST
2498 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2499 &def_stmt1, &promotion)))
2500 return NULL;
2502 if (orig_type0 && orig_type1
2503 && !types_compatible_p (orig_type0, orig_type1))
2504 return NULL;
2506 if (orig_type0)
2508 if (!types_compatible_p (orig_type0, comp_scalar_type))
2509 return NULL;
2510 then_clause = gimple_assign_rhs1 (def_stmt0);
2511 itype = orig_type0;
2514 if (orig_type1)
2516 if (!types_compatible_p (orig_type1, comp_scalar_type))
2517 return NULL;
2518 else_clause = gimple_assign_rhs1 (def_stmt1);
2519 itype = orig_type1;
2522 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2524 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2525 return NULL;
2527 vectype = get_vectype_for_scalar_type (type);
2528 if (vectype == NULL_TREE)
2529 return NULL;
2531 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2532 return NULL;
2534 if (itype == NULL_TREE)
2535 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2536 TYPE_UNSIGNED (type));
2538 if (itype == NULL_TREE
2539 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2540 return NULL;
2542 vecitype = get_vectype_for_scalar_type (itype);
2543 if (vecitype == NULL_TREE)
2544 return NULL;
2546 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2547 return NULL;
2549 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2551 if ((TREE_CODE (then_clause) == INTEGER_CST
2552 && !int_fits_type_p (then_clause, itype))
2553 || (TREE_CODE (else_clause) == INTEGER_CST
2554 && !int_fits_type_p (else_clause, itype)))
2555 return NULL;
2558 def_stmt
2559 = gimple_build_assign_with_ops (COND_EXPR,
2560 vect_recog_temp_ssa_var (itype, NULL),
2561 unshare_expr (cond_expr),
2562 fold_convert (itype, then_clause),
2563 fold_convert (itype, else_clause));
2564 pattern_stmt
2565 = gimple_build_assign_with_ops (NOP_EXPR,
2566 vect_recog_temp_ssa_var (type, NULL),
2567 gimple_assign_lhs (def_stmt), NULL_TREE);
2569 new_pattern_def_seq (stmt_vinfo, def_stmt);
2570 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2571 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2572 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2573 *type_in = vecitype;
2574 *type_out = vectype;
2576 if (dump_enabled_p ())
2577 dump_printf_loc (MSG_NOTE, vect_location,
2578 "vect_recog_mixed_size_cond_pattern: detected:\n");
2580 return pattern_stmt;
2584 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2585 true if bool VAR can be optimized that way. */
2587 static bool
2588 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2590 gimple def_stmt;
2591 enum vect_def_type dt;
2592 tree def, rhs1;
2593 enum tree_code rhs_code;
2595 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2596 &dt))
2597 return false;
2599 if (dt != vect_internal_def)
2600 return false;
2602 if (!is_gimple_assign (def_stmt))
2603 return false;
2605 if (!has_single_use (def))
2606 return false;
2608 rhs1 = gimple_assign_rhs1 (def_stmt);
2609 rhs_code = gimple_assign_rhs_code (def_stmt);
2610 switch (rhs_code)
2612 case SSA_NAME:
2613 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2615 CASE_CONVERT:
2616 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2617 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2618 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2619 return false;
2620 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2622 case BIT_NOT_EXPR:
2623 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2625 case BIT_AND_EXPR:
2626 case BIT_IOR_EXPR:
2627 case BIT_XOR_EXPR:
2628 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2629 return false;
2630 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2631 bb_vinfo);
2633 default:
2634 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2636 tree vecitype, comp_vectype;
2638 /* If the comparison can throw, then is_gimple_condexpr will be
2639 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2640 if (stmt_could_throw_p (def_stmt))
2641 return false;
2643 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2644 if (comp_vectype == NULL_TREE)
2645 return false;
2647 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2649 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2650 tree itype
2651 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2652 vecitype = get_vectype_for_scalar_type (itype);
2653 if (vecitype == NULL_TREE)
2654 return false;
2656 else
2657 vecitype = comp_vectype;
2658 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2660 return false;
2665 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2666 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2667 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2669 static tree
2670 adjust_bool_pattern_cast (tree type, tree var)
2672 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2673 gimple cast_stmt, pattern_stmt;
2675 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2676 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2677 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2678 cast_stmt
2679 = gimple_build_assign_with_ops (NOP_EXPR,
2680 vect_recog_temp_ssa_var (type, NULL),
2681 gimple_assign_lhs (pattern_stmt),
2682 NULL_TREE);
2683 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2684 return gimple_assign_lhs (cast_stmt);
2688 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2689 recursively. VAR is an SSA_NAME that should be transformed from bool
2690 to a wider integer type, OUT_TYPE is the desired final integer type of
2691 the whole pattern, TRUEVAL should be NULL unless optimizing
2692 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2693 in the then_clause, STMTS is where statements with added pattern stmts
2694 should be pushed to. */
2696 static tree
2697 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2698 vec<gimple> *stmts)
2700 gimple stmt = SSA_NAME_DEF_STMT (var);
2701 enum tree_code rhs_code, def_rhs_code;
2702 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2703 location_t loc;
2704 gimple pattern_stmt, def_stmt;
2706 rhs1 = gimple_assign_rhs1 (stmt);
2707 rhs2 = gimple_assign_rhs2 (stmt);
2708 rhs_code = gimple_assign_rhs_code (stmt);
2709 loc = gimple_location (stmt);
2710 switch (rhs_code)
2712 case SSA_NAME:
2713 CASE_CONVERT:
2714 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2715 itype = TREE_TYPE (irhs1);
2716 pattern_stmt
2717 = gimple_build_assign_with_ops (SSA_NAME,
2718 vect_recog_temp_ssa_var (itype, NULL),
2719 irhs1, NULL_TREE);
2720 break;
2722 case BIT_NOT_EXPR:
2723 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2724 itype = TREE_TYPE (irhs1);
2725 pattern_stmt
2726 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2727 vect_recog_temp_ssa_var (itype, NULL),
2728 irhs1, build_int_cst (itype, 1));
2729 break;
2731 case BIT_AND_EXPR:
2732 /* Try to optimize x = y & (a < b ? 1 : 0); into
2733 x = (a < b ? y : 0);
2735 E.g. for:
2736 bool a_b, b_b, c_b;
2737 TYPE d_T;
2739 S1 a_b = x1 CMP1 y1;
2740 S2 b_b = x2 CMP2 y2;
2741 S3 c_b = a_b & b_b;
2742 S4 d_T = (TYPE) c_b;
2744 we would normally emit:
2746 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2747 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2748 S3' c_T = a_T & b_T;
2749 S4' d_T = c_T;
2751 but we can save one stmt by using the
2752 result of one of the COND_EXPRs in the other COND_EXPR and leave
2753 BIT_AND_EXPR stmt out:
2755 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2756 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2757 S4' f_T = c_T;
2759 At least when VEC_COND_EXPR is implemented using masks
2760 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2761 computes the comparison masks and ands it, in one case with
2762 all ones vector, in the other case with a vector register.
2763 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2764 often more expensive. */
2765 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2766 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2767 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2769 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2770 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2771 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2772 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2774 gimple tstmt;
2775 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2776 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2777 tstmt = stmts->pop ();
2778 gcc_assert (tstmt == def_stmt);
2779 stmts->quick_push (stmt);
2780 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2781 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2782 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2783 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2784 return irhs2;
2786 else
2787 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2788 goto and_ior_xor;
2790 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2791 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2792 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2794 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2795 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2796 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2797 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2799 gimple tstmt;
2800 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2801 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2802 tstmt = stmts->pop ();
2803 gcc_assert (tstmt == def_stmt);
2804 stmts->quick_push (stmt);
2805 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2806 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2807 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2808 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2809 return irhs1;
2811 else
2812 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2813 goto and_ior_xor;
2815 /* FALLTHRU */
2816 case BIT_IOR_EXPR:
2817 case BIT_XOR_EXPR:
2818 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2819 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2820 and_ior_xor:
2821 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2822 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2824 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2825 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2826 int out_prec = TYPE_PRECISION (out_type);
2827 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2828 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2829 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2830 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2831 else
2833 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2834 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2837 itype = TREE_TYPE (irhs1);
2838 pattern_stmt
2839 = gimple_build_assign_with_ops (rhs_code,
2840 vect_recog_temp_ssa_var (itype, NULL),
2841 irhs1, irhs2);
2842 break;
2844 default:
2845 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2846 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2847 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2848 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2849 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2851 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2852 itype
2853 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2855 else
2856 itype = TREE_TYPE (rhs1);
2857 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2858 if (trueval == NULL_TREE)
2859 trueval = build_int_cst (itype, 1);
2860 else
2861 gcc_checking_assert (useless_type_conversion_p (itype,
2862 TREE_TYPE (trueval)));
2863 pattern_stmt
2864 = gimple_build_assign_with_ops (COND_EXPR,
2865 vect_recog_temp_ssa_var (itype, NULL),
2866 cond_expr, trueval,
2867 build_int_cst (itype, 0));
2868 break;
2871 stmts->safe_push (stmt);
2872 gimple_set_location (pattern_stmt, loc);
2873 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2874 return gimple_assign_lhs (pattern_stmt);
2878 /* Function vect_recog_bool_pattern
2880 Try to find pattern like following:
2882 bool a_b, b_b, c_b, d_b, e_b;
2883 TYPE f_T;
2884 loop:
2885 S1 a_b = x1 CMP1 y1;
2886 S2 b_b = x2 CMP2 y2;
2887 S3 c_b = a_b & b_b;
2888 S4 d_b = x3 CMP3 y3;
2889 S5 e_b = c_b | d_b;
2890 S6 f_T = (TYPE) e_b;
2892 where type 'TYPE' is an integral type.
2894 Input:
2896 * LAST_STMT: A stmt at the end from which the pattern
2897 search begins, i.e. cast of a bool to
2898 an integer type.
2900 Output:
2902 * TYPE_IN: The type of the input arguments to the pattern.
2904 * TYPE_OUT: The type of the output of this pattern.
2906 * Return value: A new stmt that will be used to replace the pattern.
2908 Assuming size of TYPE is the same as size of all comparisons
2909 (otherwise some casts would be added where needed), the above
2910 sequence we create related pattern stmts:
2911 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2912 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2913 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2914 S5' e_T = c_T | d_T;
2915 S6' f_T = e_T;
2917 Instead of the above S3' we could emit:
2918 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2919 S3' c_T = a_T | b_T;
2920 but the above is more efficient. */
2922 static gimple
2923 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2924 tree *type_out)
2926 gimple last_stmt = stmts->pop ();
2927 enum tree_code rhs_code;
2928 tree var, lhs, rhs, vectype;
2929 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2930 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2931 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2932 gimple pattern_stmt;
2934 if (!is_gimple_assign (last_stmt))
2935 return NULL;
2937 var = gimple_assign_rhs1 (last_stmt);
2938 lhs = gimple_assign_lhs (last_stmt);
2940 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2941 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2942 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2943 return NULL;
2945 rhs_code = gimple_assign_rhs_code (last_stmt);
2946 if (CONVERT_EXPR_CODE_P (rhs_code))
2948 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2949 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2950 return NULL;
2951 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2952 if (vectype == NULL_TREE)
2953 return NULL;
2955 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2956 return NULL;
2958 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2959 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2960 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2961 pattern_stmt
2962 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2963 else
2964 pattern_stmt
2965 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2966 *type_out = vectype;
2967 *type_in = vectype;
2968 stmts->safe_push (last_stmt);
2969 if (dump_enabled_p ())
2970 dump_printf_loc (MSG_NOTE, vect_location,
2971 "vect_recog_bool_pattern: detected:\n");
2973 return pattern_stmt;
2975 else if (rhs_code == SSA_NAME
2976 && STMT_VINFO_DATA_REF (stmt_vinfo))
2978 stmt_vec_info pattern_stmt_info;
2979 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2980 gcc_assert (vectype != NULL_TREE);
2981 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2982 return NULL;
2983 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2984 return NULL;
2986 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2987 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2988 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2990 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2991 gimple cast_stmt
2992 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2993 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2994 rhs = rhs2;
2996 pattern_stmt
2997 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2998 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2999 bb_vinfo);
3000 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3001 STMT_VINFO_DATA_REF (pattern_stmt_info)
3002 = STMT_VINFO_DATA_REF (stmt_vinfo);
3003 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3004 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3005 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3006 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3007 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3008 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3009 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3010 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3011 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3012 *type_out = vectype;
3013 *type_in = vectype;
3014 stmts->safe_push (last_stmt);
3015 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_NOTE, vect_location,
3017 "vect_recog_bool_pattern: detected:\n");
3018 return pattern_stmt;
3020 else
3021 return NULL;
3025 /* Mark statements that are involved in a pattern. */
3027 static inline void
3028 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3029 tree pattern_vectype)
3031 stmt_vec_info pattern_stmt_info, def_stmt_info;
3032 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3033 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3034 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3035 gimple def_stmt;
3037 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3038 if (pattern_stmt_info == NULL)
3040 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3041 bb_vinfo);
3042 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3044 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3046 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3047 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3048 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3049 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3050 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3051 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3052 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3053 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3054 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3056 gimple_stmt_iterator si;
3057 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3058 !gsi_end_p (si); gsi_next (&si))
3060 def_stmt = gsi_stmt (si);
3061 def_stmt_info = vinfo_for_stmt (def_stmt);
3062 if (def_stmt_info == NULL)
3064 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3065 bb_vinfo);
3066 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3068 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3069 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3070 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3071 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3072 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3077 /* Function vect_pattern_recog_1
3079 Input:
3080 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3081 computation pattern.
3082 STMT: A stmt from which the pattern search should start.
3084 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3085 expression that computes the same functionality and can be used to
3086 replace the sequence of stmts that are involved in the pattern.
3088 Output:
3089 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3090 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3091 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3092 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3093 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3094 to the available target pattern.
3096 This function also does some bookkeeping, as explained in the documentation
3097 for vect_recog_pattern. */
3099 static void
3100 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3101 gimple_stmt_iterator si,
3102 vec<gimple> *stmts_to_replace)
3104 gimple stmt = gsi_stmt (si), pattern_stmt;
3105 stmt_vec_info stmt_info;
3106 loop_vec_info loop_vinfo;
3107 tree pattern_vectype;
3108 tree type_in, type_out;
3109 enum tree_code code;
3110 int i;
3111 gimple next;
3113 stmts_to_replace->truncate (0);
3114 stmts_to_replace->quick_push (stmt);
3115 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3116 if (!pattern_stmt)
3117 return;
3119 stmt = stmts_to_replace->last ();
3120 stmt_info = vinfo_for_stmt (stmt);
3121 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3123 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3125 /* No need to check target support (already checked by the pattern
3126 recognition function). */
3127 pattern_vectype = type_out ? type_out : type_in;
3129 else
3131 enum machine_mode vec_mode;
3132 enum insn_code icode;
3133 optab optab;
3135 /* Check target support */
3136 type_in = get_vectype_for_scalar_type (type_in);
3137 if (!type_in)
3138 return;
3139 if (type_out)
3140 type_out = get_vectype_for_scalar_type (type_out);
3141 else
3142 type_out = type_in;
3143 if (!type_out)
3144 return;
3145 pattern_vectype = type_out;
3147 if (is_gimple_assign (pattern_stmt))
3148 code = gimple_assign_rhs_code (pattern_stmt);
3149 else
3151 gcc_assert (is_gimple_call (pattern_stmt));
3152 code = CALL_EXPR;
3155 optab = optab_for_tree_code (code, type_in, optab_default);
3156 vec_mode = TYPE_MODE (type_in);
3157 if (!optab
3158 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3159 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3160 return;
3163 /* Found a vectorizable pattern. */
3164 if (dump_enabled_p ())
3166 dump_printf_loc (MSG_NOTE, vect_location,
3167 "pattern recognized: ");
3168 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3169 dump_printf (MSG_NOTE, "\n");
3172 /* Mark the stmts that are involved in the pattern. */
3173 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3175 /* Patterns cannot be vectorized using SLP, because they change the order of
3176 computation. */
3177 if (loop_vinfo)
3178 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3179 if (next == stmt)
3180 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3182 /* It is possible that additional pattern stmts are created and inserted in
3183 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3184 relevant statements. */
3185 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3186 && (unsigned) i < (stmts_to_replace->length () - 1);
3187 i++)
3189 stmt_info = vinfo_for_stmt (stmt);
3190 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3191 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_NOTE, vect_location,
3194 "additional pattern stmt: ");
3195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3196 dump_printf (MSG_NOTE, "\n");
3199 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3204 /* Function vect_pattern_recog
3206 Input:
3207 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3208 computation idioms.
3210 Output - for each computation idiom that is detected we create a new stmt
3211 that provides the same functionality and that can be vectorized. We
3212 also record some information in the struct_stmt_info of the relevant
3213 stmts, as explained below:
3215 At the entry to this function we have the following stmts, with the
3216 following initial value in the STMT_VINFO fields:
3218 stmt in_pattern_p related_stmt vec_stmt
3219 S1: a_i = .... - - -
3220 S2: a_2 = ..use(a_i).. - - -
3221 S3: a_1 = ..use(a_2).. - - -
3222 S4: a_0 = ..use(a_1).. - - -
3223 S5: ... = ..use(a_0).. - - -
3225 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3226 represented by a single stmt. We then:
3227 - create a new stmt S6 equivalent to the pattern (the stmt is not
3228 inserted into the code)
3229 - fill in the STMT_VINFO fields as follows:
3231 in_pattern_p related_stmt vec_stmt
3232 S1: a_i = .... - - -
3233 S2: a_2 = ..use(a_i).. - - -
3234 S3: a_1 = ..use(a_2).. - - -
3235 S4: a_0 = ..use(a_1).. true S6 -
3236 '---> S6: a_new = .... - S4 -
3237 S5: ... = ..use(a_0).. - - -
3239 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3240 to each other through the RELATED_STMT field).
3242 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3243 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3244 remain irrelevant unless used by stmts other than S4.
3246 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3247 (because they are marked as irrelevant). It will vectorize S6, and record
3248 a pointer to the new vector stmt VS6 from S6 (as usual).
3249 S4 will be skipped, and S5 will be vectorized as usual:
3251 in_pattern_p related_stmt vec_stmt
3252 S1: a_i = .... - - -
3253 S2: a_2 = ..use(a_i).. - - -
3254 S3: a_1 = ..use(a_2).. - - -
3255 > VS6: va_new = .... - - -
3256 S4: a_0 = ..use(a_1).. true S6 VS6
3257 '---> S6: a_new = .... - S4 VS6
3258 > VS5: ... = ..vuse(va_new).. - - -
3259 S5: ... = ..use(a_0).. - - -
3261 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3262 elsewhere), and we'll end up with:
3264 VS6: va_new = ....
3265 VS5: ... = ..vuse(va_new)..
3267 In case of more than one pattern statements, e.g., widen-mult with
3268 intermediate type:
3270 S1 a_t = ;
3271 S2 a_T = (TYPE) a_t;
3272 '--> S3: a_it = (interm_type) a_t;
3273 S4 prod_T = a_T * CONST;
3274 '--> S5: prod_T' = a_it w* CONST;
3276 there may be other users of a_T outside the pattern. In that case S2 will
3277 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3278 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3279 be recorded in S3. */
3281 void
3282 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3284 struct loop *loop;
3285 basic_block *bbs;
3286 unsigned int nbbs;
3287 gimple_stmt_iterator si;
3288 unsigned int i, j;
3289 vect_recog_func_ptr vect_recog_func;
3290 auto_vec<gimple, 1> stmts_to_replace;
3291 gimple stmt;
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE, vect_location,
3295 "=== vect_pattern_recog ===\n");
3297 if (loop_vinfo)
3299 loop = LOOP_VINFO_LOOP (loop_vinfo);
3300 bbs = LOOP_VINFO_BBS (loop_vinfo);
3301 nbbs = loop->num_nodes;
3303 else
3305 bbs = &BB_VINFO_BB (bb_vinfo);
3306 nbbs = 1;
3309 /* Scan through the loop stmts, applying the pattern recognition
3310 functions starting at each stmt visited: */
3311 for (i = 0; i < nbbs; i++)
3313 basic_block bb = bbs[i];
3314 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3316 if (bb_vinfo && (stmt = gsi_stmt (si))
3317 && vinfo_for_stmt (stmt)
3318 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3319 continue;
3321 /* Scan over all generic vect_recog_xxx_pattern functions. */
3322 for (j = 0; j < NUM_PATTERNS; j++)
3324 vect_recog_func = vect_vect_recog_func_ptrs[j];
3325 vect_pattern_recog_1 (vect_recog_func, si,
3326 &stmts_to_replace);