gcc.dg/vect/vect-singleton_1.c: Remove duplicate of test body.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobb504f4241db4b108b5f262c825c04684055214e3
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "tree-eh.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "cfgloop.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "params.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
51 #include "dumpfile.h"
52 #include "builtins.h"
54 /* Pattern recognition functions */
55 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
56 tree *);
57 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
58 tree *);
59 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
60 tree *);
61 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
62 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
63 tree *);
64 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
65 tree *, tree *);
66 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
67 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
68 tree *, tree *);
69 static gimple vect_recog_divmod_pattern (vec<gimple> *,
70 tree *, tree *);
71 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
72 tree *, tree *);
73 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
74 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
75 vect_recog_widen_mult_pattern,
76 vect_recog_widen_sum_pattern,
77 vect_recog_dot_prod_pattern,
78 vect_recog_pow_pattern,
79 vect_recog_widen_shift_pattern,
80 vect_recog_over_widening_pattern,
81 vect_recog_rotate_pattern,
82 vect_recog_vector_vector_shift_pattern,
83 vect_recog_divmod_pattern,
84 vect_recog_mixed_size_cond_pattern,
85 vect_recog_bool_pattern};
87 static inline void
88 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
90 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
91 stmt);
94 static inline void
95 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
97 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
98 append_pattern_def_seq (stmt_info, stmt);
101 /* Check whether STMT2 is in the same loop or basic block as STMT1.
102 Which of the two applies depends on whether we're currently doing
103 loop-based or basic-block-based vectorization, as determined by
104 the vinfo_for_stmt for STMT1 (which must be defined).
106 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
107 to be defined as well. */
109 static bool
110 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
112 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
113 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
114 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
116 if (!gimple_bb (stmt2))
117 return false;
119 if (loop_vinfo)
121 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
122 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
123 return false;
125 else
127 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
128 || gimple_code (stmt2) == GIMPLE_PHI)
129 return false;
132 gcc_assert (vinfo_for_stmt (stmt2));
133 return true;
136 /* If the LHS of DEF_STMT has a single use, and that statement is
137 in the same loop or basic block, return it. */
139 static gimple
140 vect_single_imm_use (gimple def_stmt)
142 tree lhs = gimple_assign_lhs (def_stmt);
143 use_operand_p use_p;
144 gimple use_stmt;
146 if (!single_imm_use (lhs, &use_p, &use_stmt))
147 return NULL;
149 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
150 return NULL;
152 return use_stmt;
155 /* Check whether NAME, an ssa-name used in USE_STMT,
156 is a result of a type promotion or demotion, such that:
157 DEF_STMT: NAME = NOP (name0)
158 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
159 If CHECK_SIGN is TRUE, check that either both types are signed or both are
160 unsigned. */
162 static bool
163 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
164 tree *orig_type, gimple *def_stmt, bool *promotion)
166 tree dummy;
167 gimple dummy_gimple;
168 loop_vec_info loop_vinfo;
169 stmt_vec_info stmt_vinfo;
170 tree type = TREE_TYPE (name);
171 tree oprnd0;
172 enum vect_def_type dt;
173 tree def;
174 bb_vec_info bb_vinfo;
176 stmt_vinfo = vinfo_for_stmt (use_stmt);
177 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
178 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
179 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
180 &def, &dt))
181 return false;
183 if (dt != vect_internal_def
184 && dt != vect_external_def && dt != vect_constant_def)
185 return false;
187 if (!*def_stmt)
188 return false;
190 if (!is_gimple_assign (*def_stmt))
191 return false;
193 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
194 return false;
196 oprnd0 = gimple_assign_rhs1 (*def_stmt);
198 *orig_type = TREE_TYPE (oprnd0);
199 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
200 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
201 return false;
203 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
204 *promotion = true;
205 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
206 *promotion = false;
207 else
208 return false;
210 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
211 bb_vinfo, &dummy_gimple, &dummy, &dt))
212 return false;
214 return true;
217 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
218 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
220 static tree
221 vect_recog_temp_ssa_var (tree type, gimple stmt)
223 return make_temp_ssa_name (type, stmt, "patt");
226 /* Function vect_recog_dot_prod_pattern
228 Try to find the following pattern:
230 type x_t, y_t;
231 TYPE1 prod;
232 TYPE2 sum = init;
233 loop:
234 sum_0 = phi <init, sum_1>
235 S1 x_t = ...
236 S2 y_t = ...
237 S3 x_T = (TYPE1) x_t;
238 S4 y_T = (TYPE1) y_t;
239 S5 prod = x_T * y_T;
240 [S6 prod = (TYPE2) prod; #optional]
241 S7 sum_1 = prod + sum_0;
243 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
244 same size of 'TYPE1' or bigger. This is a special case of a reduction
245 computation.
247 Input:
249 * STMTS: Contains a stmt from which the pattern search begins. In the
250 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
251 will be detected.
253 Output:
255 * TYPE_IN: The type of the input arguments to the pattern.
257 * TYPE_OUT: The type of the output of this pattern.
259 * Return value: A new stmt that will be used to replace the sequence of
260 stmts that constitute the pattern. In this case it will be:
261 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
263 Note: The dot-prod idiom is a widening reduction pattern that is
264 vectorized without preserving all the intermediate results. It
265 produces only N/2 (widened) results (by summing up pairs of
266 intermediate results) rather than all N results. Therefore, we
267 cannot allow this pattern when we want to get all the results and in
268 the correct order (as is the case when this computation is in an
269 inner-loop nested in an outer-loop that us being vectorized). */
271 static gimple
272 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
273 tree *type_out)
275 gimple stmt, last_stmt = (*stmts)[0];
276 tree oprnd0, oprnd1;
277 tree oprnd00, oprnd01;
278 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
279 tree type, half_type;
280 gimple pattern_stmt;
281 tree prod_type;
282 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
283 struct loop *loop;
284 tree var;
285 bool promotion;
287 if (!loop_info)
288 return NULL;
290 loop = LOOP_VINFO_LOOP (loop_info);
292 if (!is_gimple_assign (last_stmt))
293 return NULL;
295 type = gimple_expr_type (last_stmt);
297 /* Look for the following pattern
298 DX = (TYPE1) X;
299 DY = (TYPE1) Y;
300 DPROD = DX * DY;
301 DDPROD = (TYPE2) DPROD;
302 sum_1 = DDPROD + sum_0;
303 In which
304 - DX is double the size of X
305 - DY is double the size of Y
306 - DX, DY, DPROD all have the same type
307 - sum is the same size of DPROD or bigger
308 - sum has been recognized as a reduction variable.
310 This is equivalent to:
311 DPROD = X w* Y; #widen mult
312 sum_1 = DPROD w+ sum_0; #widen summation
314 DPROD = X w* Y; #widen mult
315 sum_1 = DPROD + sum_0; #summation
318 /* Starting from LAST_STMT, follow the defs of its uses in search
319 of the above pattern. */
321 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
322 return NULL;
324 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
326 /* Has been detected as widening-summation? */
328 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
329 type = gimple_expr_type (stmt);
330 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
331 return NULL;
332 oprnd0 = gimple_assign_rhs1 (stmt);
333 oprnd1 = gimple_assign_rhs2 (stmt);
334 half_type = TREE_TYPE (oprnd0);
336 else
338 gimple def_stmt;
340 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
341 return NULL;
342 oprnd0 = gimple_assign_rhs1 (last_stmt);
343 oprnd1 = gimple_assign_rhs2 (last_stmt);
344 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
345 || !types_compatible_p (TREE_TYPE (oprnd1), type))
346 return NULL;
347 stmt = last_stmt;
349 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
350 &promotion)
351 && promotion)
353 stmt = def_stmt;
354 oprnd0 = gimple_assign_rhs1 (stmt);
356 else
357 half_type = type;
360 /* So far so good. Since last_stmt was detected as a (summation) reduction,
361 we know that oprnd1 is the reduction variable (defined by a loop-header
362 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
363 Left to check that oprnd0 is defined by a (widen_)mult_expr */
364 if (TREE_CODE (oprnd0) != SSA_NAME)
365 return NULL;
367 prod_type = half_type;
368 stmt = SSA_NAME_DEF_STMT (oprnd0);
370 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
371 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
372 return NULL;
374 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
375 inside the loop (in case we are analyzing an outer-loop). */
376 if (!is_gimple_assign (stmt))
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
381 return NULL;
382 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
383 return NULL;
384 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
386 /* Has been detected as a widening multiplication? */
388 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
389 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
390 return NULL;
391 stmt_vinfo = vinfo_for_stmt (stmt);
392 gcc_assert (stmt_vinfo);
393 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
394 oprnd00 = gimple_assign_rhs1 (stmt);
395 oprnd01 = gimple_assign_rhs2 (stmt);
396 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
397 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
399 else
401 tree half_type0, half_type1;
402 gimple def_stmt;
403 tree oprnd0, oprnd1;
405 oprnd0 = gimple_assign_rhs1 (stmt);
406 oprnd1 = gimple_assign_rhs2 (stmt);
407 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
408 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
409 return NULL;
410 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
411 &promotion)
412 || !promotion)
413 return NULL;
414 oprnd00 = gimple_assign_rhs1 (def_stmt);
415 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
416 &promotion)
417 || !promotion)
418 return NULL;
419 oprnd01 = gimple_assign_rhs1 (def_stmt);
420 if (!types_compatible_p (half_type0, half_type1))
421 return NULL;
422 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
423 return NULL;
426 half_type = TREE_TYPE (oprnd00);
427 *type_in = half_type;
428 *type_out = type;
430 /* Pattern detected. Create a stmt to be used to replace the pattern: */
431 var = vect_recog_temp_ssa_var (type, NULL);
432 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
433 oprnd00, oprnd01, oprnd1);
435 if (dump_enabled_p ())
437 dump_printf_loc (MSG_NOTE, vect_location,
438 "vect_recog_dot_prod_pattern: detected: ");
439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
440 dump_printf (MSG_NOTE, "\n");
443 /* We don't allow changing the order of the computation in the inner-loop
444 when doing outer-loop vectorization. */
445 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
447 return pattern_stmt;
451 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
452 and LSHIFT_EXPR.
454 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
455 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
457 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
458 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
459 that satisfies the above restrictions, we can perform a widening opeartion
460 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
461 with a_it = (interm_type) a_t; */
463 static bool
464 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
465 tree const_oprnd, tree *oprnd,
466 vec<gimple> *stmts, tree type,
467 tree *half_type, gimple def_stmt)
469 tree new_type, new_oprnd;
470 gimple new_stmt;
472 if (code != MULT_EXPR && code != LSHIFT_EXPR)
473 return false;
475 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
476 || (code == LSHIFT_EXPR
477 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
478 != 1))
479 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
481 /* CONST_OPRND is a constant of HALF_TYPE. */
482 *oprnd = gimple_assign_rhs1 (def_stmt);
483 return true;
486 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
487 return false;
489 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
490 return false;
492 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
493 a type 2 times bigger than HALF_TYPE. */
494 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
495 TYPE_UNSIGNED (type));
496 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
497 || (code == LSHIFT_EXPR
498 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
499 return false;
501 /* Use NEW_TYPE for widening operation. */
502 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
504 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
505 /* Check if the already created pattern stmt is what we need. */
506 if (!is_gimple_assign (new_stmt)
507 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
508 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
509 return false;
511 stmts->safe_push (def_stmt);
512 *oprnd = gimple_assign_lhs (new_stmt);
514 else
516 /* Create a_T = (NEW_TYPE) a_t; */
517 *oprnd = gimple_assign_rhs1 (def_stmt);
518 new_oprnd = make_ssa_name (new_type, NULL);
519 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
520 NULL_TREE);
521 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
522 stmts->safe_push (def_stmt);
523 *oprnd = new_oprnd;
526 *half_type = new_type;
527 return true;
531 /* Function vect_recog_widen_mult_pattern
533 Try to find the following pattern:
535 type1 a_t;
536 type2 b_t;
537 TYPE a_T, b_T, prod_T;
539 S1 a_t = ;
540 S2 b_t = ;
541 S3 a_T = (TYPE) a_t;
542 S4 b_T = (TYPE) b_t;
543 S5 prod_T = a_T * b_T;
545 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
547 Also detect unsigned cases:
549 unsigned type1 a_t;
550 unsigned type2 b_t;
551 unsigned TYPE u_prod_T;
552 TYPE a_T, b_T, prod_T;
554 S1 a_t = ;
555 S2 b_t = ;
556 S3 a_T = (TYPE) a_t;
557 S4 b_T = (TYPE) b_t;
558 S5 prod_T = a_T * b_T;
559 S6 u_prod_T = (unsigned TYPE) prod_T;
561 and multiplication by constants:
563 type a_t;
564 TYPE a_T, prod_T;
566 S1 a_t = ;
567 S3 a_T = (TYPE) a_t;
568 S5 prod_T = a_T * CONST;
570 A special case of multiplication by constants is when 'TYPE' is 4 times
571 bigger than 'type', but CONST fits an intermediate type 2 times smaller
572 than 'TYPE'. In that case we create an additional pattern stmt for S3
573 to create a variable of the intermediate type, and perform widen-mult
574 on the intermediate type as well:
576 type a_t;
577 interm_type a_it;
578 TYPE a_T, prod_T, prod_T';
580 S1 a_t = ;
581 S3 a_T = (TYPE) a_t;
582 '--> a_it = (interm_type) a_t;
583 S5 prod_T = a_T * CONST;
584 '--> prod_T' = a_it w* CONST;
586 Input/Output:
588 * STMTS: Contains a stmt from which the pattern search begins. In the
589 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
590 is detected. In case of unsigned widen-mult, the original stmt (S5) is
591 replaced with S6 in STMTS. In case of multiplication by a constant
592 of an intermediate type (the last case above), STMTS also contains S3
593 (inserted before S5).
595 Output:
597 * TYPE_IN: The type of the input arguments to the pattern.
599 * TYPE_OUT: The type of the output of this pattern.
601 * Return value: A new stmt that will be used to replace the sequence of
602 stmts that constitute the pattern. In this case it will be:
603 WIDEN_MULT <a_t, b_t>
604 If the result of WIDEN_MULT needs to be converted to a larger type, the
605 returned stmt will be this type conversion stmt.
608 static gimple
609 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
610 tree *type_in, tree *type_out)
612 gimple last_stmt = stmts->pop ();
613 gimple def_stmt0, def_stmt1;
614 tree oprnd0, oprnd1;
615 tree type, half_type0, half_type1;
616 gimple new_stmt = NULL, pattern_stmt = NULL;
617 tree vectype, vecitype;
618 tree var;
619 enum tree_code dummy_code;
620 int dummy_int;
621 vec<tree> dummy_vec;
622 bool op1_ok;
623 bool promotion;
625 if (!is_gimple_assign (last_stmt))
626 return NULL;
628 type = gimple_expr_type (last_stmt);
630 /* Starting from LAST_STMT, follow the defs of its uses in search
631 of the above pattern. */
633 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
634 return NULL;
636 oprnd0 = gimple_assign_rhs1 (last_stmt);
637 oprnd1 = gimple_assign_rhs2 (last_stmt);
638 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
639 || !types_compatible_p (TREE_TYPE (oprnd1), type))
640 return NULL;
642 /* Check argument 0. */
643 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
644 &promotion)
645 || !promotion)
646 return NULL;
647 /* Check argument 1. */
648 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
649 &def_stmt1, &promotion);
651 if (op1_ok && promotion)
653 oprnd0 = gimple_assign_rhs1 (def_stmt0);
654 oprnd1 = gimple_assign_rhs1 (def_stmt1);
656 else
658 if (TREE_CODE (oprnd1) == INTEGER_CST
659 && TREE_CODE (half_type0) == INTEGER_TYPE
660 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
661 &oprnd0, stmts, type,
662 &half_type0, def_stmt0))
664 half_type1 = half_type0;
665 oprnd1 = fold_convert (half_type1, oprnd1);
667 else
668 return NULL;
671 /* If the two arguments have different sizes, convert the one with
672 the smaller type into the larger type. */
673 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
675 tree* oprnd = NULL;
676 gimple def_stmt = NULL;
678 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
680 def_stmt = def_stmt0;
681 half_type0 = half_type1;
682 oprnd = &oprnd0;
684 else
686 def_stmt = def_stmt1;
687 half_type1 = half_type0;
688 oprnd = &oprnd1;
691 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
692 tree new_oprnd = make_ssa_name (half_type0, NULL);
693 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
694 old_oprnd, NULL_TREE);
695 *oprnd = new_oprnd;
698 /* Handle unsigned case. Look for
699 S6 u_prod_T = (unsigned TYPE) prod_T;
700 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
701 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
703 gimple use_stmt;
704 tree use_lhs;
705 tree use_type;
707 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
708 return NULL;
710 use_stmt = vect_single_imm_use (last_stmt);
711 if (!use_stmt || !is_gimple_assign (use_stmt)
712 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
713 return NULL;
715 use_lhs = gimple_assign_lhs (use_stmt);
716 use_type = TREE_TYPE (use_lhs);
717 if (!INTEGRAL_TYPE_P (use_type)
718 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
719 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
720 return NULL;
722 type = use_type;
723 last_stmt = use_stmt;
726 if (!types_compatible_p (half_type0, half_type1))
727 return NULL;
729 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
730 to get an intermediate result of type ITYPE. In this case we need
731 to build a statement to convert this intermediate result to type TYPE. */
732 tree itype = type;
733 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
734 itype = build_nonstandard_integer_type
735 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
736 TYPE_UNSIGNED (type));
738 /* Pattern detected. */
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_NOTE, vect_location,
741 "vect_recog_widen_mult_pattern: detected:\n");
743 /* Check target support */
744 vectype = get_vectype_for_scalar_type (half_type0);
745 vecitype = get_vectype_for_scalar_type (itype);
746 if (!vectype
747 || !vecitype
748 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
749 vecitype, vectype,
750 &dummy_code, &dummy_code,
751 &dummy_int, &dummy_vec))
752 return NULL;
754 *type_in = vectype;
755 *type_out = get_vectype_for_scalar_type (type);
757 /* Pattern supported. Create a stmt to be used to replace the pattern: */
758 var = vect_recog_temp_ssa_var (itype, NULL);
759 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
760 oprnd1);
762 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
763 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
764 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
765 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
767 /* If the original two operands have different sizes, we may need to convert
768 the smaller one into the larget type. If this is the case, at this point
769 the new stmt is already built. */
770 if (new_stmt)
772 append_pattern_def_seq (stmt_vinfo, new_stmt);
773 stmt_vec_info new_stmt_info
774 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
775 set_vinfo_for_stmt (new_stmt, new_stmt_info);
776 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
779 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
780 the result of the widen-mult operation into type TYPE. */
781 if (itype != type)
783 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
784 stmt_vec_info pattern_stmt_info
785 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
786 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
787 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
788 pattern_stmt
789 = gimple_build_assign_with_ops (NOP_EXPR,
790 vect_recog_temp_ssa_var (type, NULL),
791 gimple_assign_lhs (pattern_stmt),
792 NULL_TREE);
795 if (dump_enabled_p ())
796 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
798 stmts->safe_push (last_stmt);
799 return pattern_stmt;
803 /* Function vect_recog_pow_pattern
805 Try to find the following pattern:
807 x = POW (y, N);
809 with POW being one of pow, powf, powi, powif and N being
810 either 2 or 0.5.
812 Input:
814 * LAST_STMT: A stmt from which the pattern search begins.
816 Output:
818 * TYPE_IN: The type of the input arguments to the pattern.
820 * TYPE_OUT: The type of the output of this pattern.
822 * Return value: A new stmt that will be used to replace the sequence of
823 stmts that constitute the pattern. In this case it will be:
824 x = x * x
826 x = sqrt (x)
829 static gimple
830 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
831 tree *type_out)
833 gimple last_stmt = (*stmts)[0];
834 tree fn, base, exp = NULL;
835 gimple stmt;
836 tree var;
838 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
839 return NULL;
841 fn = gimple_call_fndecl (last_stmt);
842 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
843 return NULL;
845 switch (DECL_FUNCTION_CODE (fn))
847 case BUILT_IN_POWIF:
848 case BUILT_IN_POWI:
849 case BUILT_IN_POWF:
850 case BUILT_IN_POW:
851 base = gimple_call_arg (last_stmt, 0);
852 exp = gimple_call_arg (last_stmt, 1);
853 if (TREE_CODE (exp) != REAL_CST
854 && TREE_CODE (exp) != INTEGER_CST)
855 return NULL;
856 break;
858 default:
859 return NULL;
862 /* We now have a pow or powi builtin function call with a constant
863 exponent. */
865 *type_out = NULL_TREE;
867 /* Catch squaring. */
868 if ((tree_fits_shwi_p (exp)
869 && tree_to_shwi (exp) == 2)
870 || (TREE_CODE (exp) == REAL_CST
871 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
873 *type_in = TREE_TYPE (base);
875 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
876 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
877 return stmt;
880 /* Catch square root. */
881 if (TREE_CODE (exp) == REAL_CST
882 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
884 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
885 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
886 if (*type_in)
888 gimple stmt = gimple_build_call (newfn, 1, base);
889 if (vectorizable_function (stmt, *type_in, *type_in)
890 != NULL_TREE)
892 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
893 gimple_call_set_lhs (stmt, var);
894 return stmt;
899 return NULL;
903 /* Function vect_recog_widen_sum_pattern
905 Try to find the following pattern:
907 type x_t;
908 TYPE x_T, sum = init;
909 loop:
910 sum_0 = phi <init, sum_1>
911 S1 x_t = *p;
912 S2 x_T = (TYPE) x_t;
913 S3 sum_1 = x_T + sum_0;
915 where type 'TYPE' is at least double the size of type 'type', i.e - we're
916 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917 a special case of a reduction computation.
919 Input:
921 * LAST_STMT: A stmt from which the pattern search begins. In the example,
922 when this function is called with S3, the pattern {S2,S3} will be detected.
924 Output:
926 * TYPE_IN: The type of the input arguments to the pattern.
928 * TYPE_OUT: The type of the output of this pattern.
930 * Return value: A new stmt that will be used to replace the sequence of
931 stmts that constitute the pattern. In this case it will be:
932 WIDEN_SUM <x_t, sum_0>
934 Note: The widening-sum idiom is a widening reduction pattern that is
935 vectorized without preserving all the intermediate results. It
936 produces only N/2 (widened) results (by summing up pairs of
937 intermediate results) rather than all N results. Therefore, we
938 cannot allow this pattern when we want to get all the results and in
939 the correct order (as is the case when this computation is in an
940 inner-loop nested in an outer-loop that us being vectorized). */
942 static gimple
943 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
944 tree *type_out)
946 gimple stmt, last_stmt = (*stmts)[0];
947 tree oprnd0, oprnd1;
948 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
949 tree type, half_type;
950 gimple pattern_stmt;
951 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
952 struct loop *loop;
953 tree var;
954 bool promotion;
956 if (!loop_info)
957 return NULL;
959 loop = LOOP_VINFO_LOOP (loop_info);
961 if (!is_gimple_assign (last_stmt))
962 return NULL;
964 type = gimple_expr_type (last_stmt);
966 /* Look for the following pattern
967 DX = (TYPE) X;
968 sum_1 = DX + sum_0;
969 In which DX is at least double the size of X, and sum_1 has been
970 recognized as a reduction variable.
973 /* Starting from LAST_STMT, follow the defs of its uses in search
974 of the above pattern. */
976 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
977 return NULL;
979 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
980 return NULL;
982 oprnd0 = gimple_assign_rhs1 (last_stmt);
983 oprnd1 = gimple_assign_rhs2 (last_stmt);
984 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
985 || !types_compatible_p (TREE_TYPE (oprnd1), type))
986 return NULL;
988 /* So far so good. Since last_stmt was detected as a (summation) reduction,
989 we know that oprnd1 is the reduction variable (defined by a loop-header
990 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
991 Left to check that oprnd0 is defined by a cast from type 'type' to type
992 'TYPE'. */
994 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
995 &promotion)
996 || !promotion)
997 return NULL;
999 oprnd0 = gimple_assign_rhs1 (stmt);
1000 *type_in = half_type;
1001 *type_out = type;
1003 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1004 var = vect_recog_temp_ssa_var (type, NULL);
1005 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1006 oprnd0, oprnd1);
1008 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_recog_widen_sum_pattern: detected: ");
1012 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1013 dump_printf (MSG_NOTE, "\n");
1016 /* We don't allow changing the order of the computation in the inner-loop
1017 when doing outer-loop vectorization. */
1018 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1020 return pattern_stmt;
1024 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1026 Input:
1027 STMT - a statement to check.
1028 DEF - we support operations with two operands, one of which is constant.
1029 The other operand can be defined by a demotion operation, or by a
1030 previous statement in a sequence of over-promoted operations. In the
1031 later case DEF is used to replace that operand. (It is defined by a
1032 pattern statement we created for the previous statement in the
1033 sequence).
1035 Input/output:
1036 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1037 NULL, it's the type of DEF.
1038 STMTS - additional pattern statements. If a pattern statement (type
1039 conversion) is created in this function, its original statement is
1040 added to STMTS.
1042 Output:
1043 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1044 operands to use in the new pattern statement for STMT (will be created
1045 in vect_recog_over_widening_pattern ()).
1046 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1047 statements for STMT: the first one is a type promotion and the second
1048 one is the operation itself. We return the type promotion statement
1049 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1050 the second pattern statement. */
1052 static bool
1053 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1054 tree *op0, tree *op1, gimple *new_def_stmt,
1055 vec<gimple> *stmts)
1057 enum tree_code code;
1058 tree const_oprnd, oprnd;
1059 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1060 gimple def_stmt, new_stmt;
1061 bool first = false;
1062 bool promotion;
1064 *op0 = NULL_TREE;
1065 *op1 = NULL_TREE;
1066 *new_def_stmt = NULL;
1068 if (!is_gimple_assign (stmt))
1069 return false;
1071 code = gimple_assign_rhs_code (stmt);
1072 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1073 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1074 return false;
1076 oprnd = gimple_assign_rhs1 (stmt);
1077 const_oprnd = gimple_assign_rhs2 (stmt);
1078 type = gimple_expr_type (stmt);
1080 if (TREE_CODE (oprnd) != SSA_NAME
1081 || TREE_CODE (const_oprnd) != INTEGER_CST)
1082 return false;
1084 /* If oprnd has other uses besides that in stmt we cannot mark it
1085 as being part of a pattern only. */
1086 if (!has_single_use (oprnd))
1087 return false;
1089 /* If we are in the middle of a sequence, we use DEF from a previous
1090 statement. Otherwise, OPRND has to be a result of type promotion. */
1091 if (*new_type)
1093 half_type = *new_type;
1094 oprnd = def;
1096 else
1098 first = true;
1099 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1100 &promotion)
1101 || !promotion
1102 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1103 return false;
1106 /* Can we perform the operation on a smaller type? */
1107 switch (code)
1109 case BIT_IOR_EXPR:
1110 case BIT_XOR_EXPR:
1111 case BIT_AND_EXPR:
1112 if (!int_fits_type_p (const_oprnd, half_type))
1114 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1115 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1116 return false;
1118 interm_type = build_nonstandard_integer_type (
1119 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1120 if (!int_fits_type_p (const_oprnd, interm_type))
1121 return false;
1124 break;
1126 case LSHIFT_EXPR:
1127 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1128 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1129 return false;
1131 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1132 (e.g., if the original value was char, the shift amount is at most 8
1133 if we want to use short). */
1134 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1135 return false;
1137 interm_type = build_nonstandard_integer_type (
1138 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1140 if (!vect_supportable_shift (code, interm_type))
1141 return false;
1143 break;
1145 case RSHIFT_EXPR:
1146 if (vect_supportable_shift (code, half_type))
1147 break;
1149 /* Try intermediate type - HALF_TYPE is not supported. */
1150 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1151 return false;
1153 interm_type = build_nonstandard_integer_type (
1154 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1156 if (!vect_supportable_shift (code, interm_type))
1157 return false;
1159 break;
1161 default:
1162 gcc_unreachable ();
1165 /* There are four possible cases:
1166 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1167 the first statement in the sequence)
1168 a. The original, HALF_TYPE, is not enough - we replace the promotion
1169 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1170 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1171 promotion.
1172 2. OPRND is defined by a pattern statement we created.
1173 a. Its type is not sufficient for the operation, we create a new stmt:
1174 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1175 this statement in NEW_DEF_STMT, and it is later put in
1176 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1177 b. OPRND is good to use in the new statement. */
1178 if (first)
1180 if (interm_type)
1182 /* Replace the original type conversion HALF_TYPE->TYPE with
1183 HALF_TYPE->INTERM_TYPE. */
1184 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1186 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1187 /* Check if the already created pattern stmt is what we need. */
1188 if (!is_gimple_assign (new_stmt)
1189 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1190 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1191 return false;
1193 stmts->safe_push (def_stmt);
1194 oprnd = gimple_assign_lhs (new_stmt);
1196 else
1198 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1199 oprnd = gimple_assign_rhs1 (def_stmt);
1200 new_oprnd = make_ssa_name (interm_type, NULL);
1201 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1202 oprnd, NULL_TREE);
1203 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1204 stmts->safe_push (def_stmt);
1205 oprnd = new_oprnd;
1208 else
1210 /* Retrieve the operand before the type promotion. */
1211 oprnd = gimple_assign_rhs1 (def_stmt);
1214 else
1216 if (interm_type)
1218 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1219 new_oprnd = make_ssa_name (interm_type, NULL);
1220 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1221 oprnd, NULL_TREE);
1222 oprnd = new_oprnd;
1223 *new_def_stmt = new_stmt;
1226 /* Otherwise, OPRND is already set. */
1229 if (interm_type)
1230 *new_type = interm_type;
1231 else
1232 *new_type = half_type;
1234 *op0 = oprnd;
1235 *op1 = fold_convert (*new_type, const_oprnd);
1237 return true;
1241 /* Try to find a statement or a sequence of statements that can be performed
1242 on a smaller type:
1244 type x_t;
1245 TYPE x_T, res0_T, res1_T;
1246 loop:
1247 S1 x_t = *p;
1248 S2 x_T = (TYPE) x_t;
1249 S3 res0_T = op (x_T, C0);
1250 S4 res1_T = op (res0_T, C1);
1251 S5 ... = () res1_T; - type demotion
1253 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1254 constants.
1255 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1256 be 'type' or some intermediate type. For now, we expect S5 to be a type
1257 demotion operation. We also check that S3 and S4 have only one use. */
1259 static gimple
1260 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1261 tree *type_in, tree *type_out)
1263 gimple stmt = stmts->pop ();
1264 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1265 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1266 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1267 bool first;
1268 tree type = NULL;
1270 first = true;
1271 while (1)
1273 if (!vinfo_for_stmt (stmt)
1274 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1275 return NULL;
1277 new_def_stmt = NULL;
1278 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1279 &op0, &op1, &new_def_stmt,
1280 stmts))
1282 if (first)
1283 return NULL;
1284 else
1285 break;
1288 /* STMT can be performed on a smaller type. Check its uses. */
1289 use_stmt = vect_single_imm_use (stmt);
1290 if (!use_stmt || !is_gimple_assign (use_stmt))
1291 return NULL;
1293 /* Create pattern statement for STMT. */
1294 vectype = get_vectype_for_scalar_type (new_type);
1295 if (!vectype)
1296 return NULL;
1298 /* We want to collect all the statements for which we create pattern
1299 statetments, except for the case when the last statement in the
1300 sequence doesn't have a corresponding pattern statement. In such
1301 case we associate the last pattern statement with the last statement
1302 in the sequence. Therefore, we only add the original statement to
1303 the list if we know that it is not the last. */
1304 if (prev_stmt)
1305 stmts->safe_push (prev_stmt);
1307 var = vect_recog_temp_ssa_var (new_type, NULL);
1308 pattern_stmt
1309 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1310 op0, op1);
1311 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1312 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1314 if (dump_enabled_p ())
1316 dump_printf_loc (MSG_NOTE, vect_location,
1317 "created pattern stmt: ");
1318 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1319 dump_printf (MSG_NOTE, "\n");
1322 type = gimple_expr_type (stmt);
1323 prev_stmt = stmt;
1324 stmt = use_stmt;
1326 first = false;
1329 /* We got a sequence. We expect it to end with a type demotion operation.
1330 Otherwise, we quit (for now). There are three possible cases: the
1331 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1332 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1333 NEW_TYPE differs (we create a new conversion statement). */
1334 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1336 use_lhs = gimple_assign_lhs (use_stmt);
1337 use_type = TREE_TYPE (use_lhs);
1338 /* Support only type demotion or signedess change. */
1339 if (!INTEGRAL_TYPE_P (use_type)
1340 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1341 return NULL;
1343 /* Check that NEW_TYPE is not bigger than the conversion result. */
1344 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1345 return NULL;
1347 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1348 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1350 /* Create NEW_TYPE->USE_TYPE conversion. */
1351 new_oprnd = make_ssa_name (use_type, NULL);
1352 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1353 var, NULL_TREE);
1354 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1356 *type_in = get_vectype_for_scalar_type (new_type);
1357 *type_out = get_vectype_for_scalar_type (use_type);
1359 /* We created a pattern statement for the last statement in the
1360 sequence, so we don't need to associate it with the pattern
1361 statement created for PREV_STMT. Therefore, we add PREV_STMT
1362 to the list in order to mark it later in vect_pattern_recog_1. */
1363 if (prev_stmt)
1364 stmts->safe_push (prev_stmt);
1366 else
1368 if (prev_stmt)
1369 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1370 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1372 *type_in = vectype;
1373 *type_out = NULL_TREE;
1376 stmts->safe_push (use_stmt);
1378 else
1379 /* TODO: support general case, create a conversion to the correct type. */
1380 return NULL;
1382 /* Pattern detected. */
1383 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "vect_recog_over_widening_pattern: detected: ");
1387 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1388 dump_printf (MSG_NOTE, "\n");
1391 return pattern_stmt;
1394 /* Detect widening shift pattern:
1396 type a_t;
1397 TYPE a_T, res_T;
1399 S1 a_t = ;
1400 S2 a_T = (TYPE) a_t;
1401 S3 res_T = a_T << CONST;
1403 where type 'TYPE' is at least double the size of type 'type'.
1405 Also detect cases where the shift result is immediately converted
1406 to another type 'result_type' that is no larger in size than 'TYPE'.
1407 In those cases we perform a widen-shift that directly results in
1408 'result_type', to avoid a possible over-widening situation:
1410 type a_t;
1411 TYPE a_T, res_T;
1412 result_type res_result;
1414 S1 a_t = ;
1415 S2 a_T = (TYPE) a_t;
1416 S3 res_T = a_T << CONST;
1417 S4 res_result = (result_type) res_T;
1418 '--> res_result' = a_t w<< CONST;
1420 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1421 create an additional pattern stmt for S2 to create a variable of an
1422 intermediate type, and perform widen-shift on the intermediate type:
1424 type a_t;
1425 interm_type a_it;
1426 TYPE a_T, res_T, res_T';
1428 S1 a_t = ;
1429 S2 a_T = (TYPE) a_t;
1430 '--> a_it = (interm_type) a_t;
1431 S3 res_T = a_T << CONST;
1432 '--> res_T' = a_it <<* CONST;
1434 Input/Output:
1436 * STMTS: Contains a stmt from which the pattern search begins.
1437 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1438 in STMTS. When an intermediate type is used and a pattern statement is
1439 created for S2, we also put S2 here (before S3).
1441 Output:
1443 * TYPE_IN: The type of the input arguments to the pattern.
1445 * TYPE_OUT: The type of the output of this pattern.
1447 * Return value: A new stmt that will be used to replace the sequence of
1448 stmts that constitute the pattern. In this case it will be:
1449 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1451 static gimple
1452 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1453 tree *type_in, tree *type_out)
1455 gimple last_stmt = stmts->pop ();
1456 gimple def_stmt0;
1457 tree oprnd0, oprnd1;
1458 tree type, half_type0;
1459 gimple pattern_stmt;
1460 tree vectype, vectype_out = NULL_TREE;
1461 tree var;
1462 enum tree_code dummy_code;
1463 int dummy_int;
1464 vec<tree> dummy_vec;
1465 gimple use_stmt;
1466 bool promotion;
1468 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1469 return NULL;
1471 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1472 return NULL;
1474 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1475 return NULL;
1477 oprnd0 = gimple_assign_rhs1 (last_stmt);
1478 oprnd1 = gimple_assign_rhs2 (last_stmt);
1479 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1480 return NULL;
1482 /* Check operand 0: it has to be defined by a type promotion. */
1483 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1484 &promotion)
1485 || !promotion)
1486 return NULL;
1488 /* Check operand 1: has to be positive. We check that it fits the type
1489 in vect_handle_widen_op_by_const (). */
1490 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1491 return NULL;
1493 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1494 type = gimple_expr_type (last_stmt);
1496 /* Check for subsequent conversion to another type. */
1497 use_stmt = vect_single_imm_use (last_stmt);
1498 if (use_stmt && is_gimple_assign (use_stmt)
1499 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1500 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1502 tree use_lhs = gimple_assign_lhs (use_stmt);
1503 tree use_type = TREE_TYPE (use_lhs);
1505 if (INTEGRAL_TYPE_P (use_type)
1506 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1508 last_stmt = use_stmt;
1509 type = use_type;
1513 /* Check if this a widening operation. */
1514 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1515 &oprnd0, stmts,
1516 type, &half_type0, def_stmt0))
1517 return NULL;
1519 /* Pattern detected. */
1520 if (dump_enabled_p ())
1521 dump_printf_loc (MSG_NOTE, vect_location,
1522 "vect_recog_widen_shift_pattern: detected:\n");
1524 /* Check target support. */
1525 vectype = get_vectype_for_scalar_type (half_type0);
1526 vectype_out = get_vectype_for_scalar_type (type);
1528 if (!vectype
1529 || !vectype_out
1530 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1531 vectype_out, vectype,
1532 &dummy_code, &dummy_code,
1533 &dummy_int, &dummy_vec))
1534 return NULL;
1536 *type_in = vectype;
1537 *type_out = vectype_out;
1539 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1540 var = vect_recog_temp_ssa_var (type, NULL);
1541 pattern_stmt =
1542 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1544 if (dump_enabled_p ())
1545 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1547 stmts->safe_push (last_stmt);
1548 return pattern_stmt;
1551 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1553 type a_t, b_t, c_t;
1555 S0 a_t = b_t r<< c_t;
1557 Input/Output:
1559 * STMTS: Contains a stmt from which the pattern search begins,
1560 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1561 with a sequence:
1563 S1 d_t = -c_t;
1564 S2 e_t = d_t & (B - 1);
1565 S3 f_t = b_t << c_t;
1566 S4 g_t = b_t >> e_t;
1567 S0 a_t = f_t | g_t;
1569 where B is element bitsize of type.
1571 Output:
1573 * TYPE_IN: The type of the input arguments to the pattern.
1575 * TYPE_OUT: The type of the output of this pattern.
1577 * Return value: A new stmt that will be used to replace the rotate
1578 S0 stmt. */
1580 static gimple
1581 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1583 gimple last_stmt = stmts->pop ();
1584 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1585 gimple pattern_stmt, def_stmt;
1586 enum tree_code rhs_code;
1587 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1588 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1589 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1590 enum vect_def_type dt;
1591 optab optab1, optab2;
1592 edge ext_def = NULL;
1594 if (!is_gimple_assign (last_stmt))
1595 return NULL;
1597 rhs_code = gimple_assign_rhs_code (last_stmt);
1598 switch (rhs_code)
1600 case LROTATE_EXPR:
1601 case RROTATE_EXPR:
1602 break;
1603 default:
1604 return NULL;
1607 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1608 return NULL;
1610 lhs = gimple_assign_lhs (last_stmt);
1611 oprnd0 = gimple_assign_rhs1 (last_stmt);
1612 type = TREE_TYPE (oprnd0);
1613 oprnd1 = gimple_assign_rhs2 (last_stmt);
1614 if (TREE_CODE (oprnd0) != SSA_NAME
1615 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1616 || !INTEGRAL_TYPE_P (type)
1617 || !TYPE_UNSIGNED (type))
1618 return NULL;
1620 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1621 &def, &dt))
1622 return NULL;
1624 if (dt != vect_internal_def
1625 && dt != vect_constant_def
1626 && dt != vect_external_def)
1627 return NULL;
1629 vectype = get_vectype_for_scalar_type (type);
1630 if (vectype == NULL_TREE)
1631 return NULL;
1633 /* If vector/vector or vector/scalar rotate is supported by the target,
1634 don't do anything here. */
1635 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1636 if (optab1
1637 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1638 return NULL;
1640 if (bb_vinfo != NULL || dt != vect_internal_def)
1642 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1643 if (optab2
1644 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1645 return NULL;
1648 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1649 don't do anything here either. */
1650 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1651 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1652 if (!optab1
1653 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1654 || !optab2
1655 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1657 if (bb_vinfo == NULL && dt == vect_internal_def)
1658 return NULL;
1659 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1660 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1661 if (!optab1
1662 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1663 || !optab2
1664 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1665 return NULL;
1668 *type_in = vectype;
1669 *type_out = vectype;
1670 if (*type_in == NULL_TREE)
1671 return NULL;
1673 if (dt == vect_external_def
1674 && TREE_CODE (oprnd1) == SSA_NAME
1675 && loop_vinfo)
1677 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1678 ext_def = loop_preheader_edge (loop);
1679 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1681 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1682 if (bb == NULL
1683 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1684 ext_def = NULL;
1688 def = NULL_TREE;
1689 if (TREE_CODE (oprnd1) == INTEGER_CST
1690 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1691 def = oprnd1;
1692 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1694 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1695 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1696 && TYPE_PRECISION (TREE_TYPE (rhs1))
1697 == TYPE_PRECISION (type))
1698 def = rhs1;
1701 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1702 if (def == NULL_TREE)
1704 def = vect_recog_temp_ssa_var (type, NULL);
1705 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1706 NULL_TREE);
1707 if (ext_def)
1709 basic_block new_bb
1710 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1711 gcc_assert (!new_bb);
1713 else
1714 append_pattern_def_seq (stmt_vinfo, def_stmt);
1716 stype = TREE_TYPE (def);
1718 if (TREE_CODE (def) == INTEGER_CST)
1720 if (!tree_fits_uhwi_p (def)
1721 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1722 || integer_zerop (def))
1723 return NULL;
1724 def2 = build_int_cst (stype,
1725 GET_MODE_PRECISION (TYPE_MODE (type))
1726 - tree_to_uhwi (def));
1728 else
1730 tree vecstype = get_vectype_for_scalar_type (stype);
1731 stmt_vec_info def_stmt_vinfo;
1733 if (vecstype == NULL_TREE)
1734 return NULL;
1735 def2 = vect_recog_temp_ssa_var (stype, NULL);
1736 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1737 NULL_TREE);
1738 if (ext_def)
1740 basic_block new_bb
1741 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1742 gcc_assert (!new_bb);
1744 else
1746 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1747 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1748 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1749 append_pattern_def_seq (stmt_vinfo, def_stmt);
1752 def2 = vect_recog_temp_ssa_var (stype, NULL);
1753 tree mask
1754 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1755 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1756 gimple_assign_lhs (def_stmt),
1757 mask);
1758 if (ext_def)
1760 basic_block new_bb
1761 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1762 gcc_assert (!new_bb);
1764 else
1766 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1767 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1768 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1769 append_pattern_def_seq (stmt_vinfo, def_stmt);
1773 var1 = vect_recog_temp_ssa_var (type, NULL);
1774 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1775 ? LSHIFT_EXPR : RSHIFT_EXPR,
1776 var1, oprnd0, def);
1777 append_pattern_def_seq (stmt_vinfo, def_stmt);
1779 var2 = vect_recog_temp_ssa_var (type, NULL);
1780 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1781 ? RSHIFT_EXPR : LSHIFT_EXPR,
1782 var2, oprnd0, def2);
1783 append_pattern_def_seq (stmt_vinfo, def_stmt);
1785 /* Pattern detected. */
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_NOTE, vect_location,
1788 "vect_recog_rotate_pattern: detected:\n");
1790 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1791 var = vect_recog_temp_ssa_var (type, NULL);
1792 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1794 if (dump_enabled_p ())
1795 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1797 stmts->safe_push (last_stmt);
1798 return pattern_stmt;
1801 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1802 vectorized:
1804 type a_t;
1805 TYPE b_T, res_T;
1807 S1 a_t = ;
1808 S2 b_T = ;
1809 S3 res_T = b_T op a_t;
1811 where type 'TYPE' is a type with different size than 'type',
1812 and op is <<, >> or rotate.
1814 Also detect cases:
1816 type a_t;
1817 TYPE b_T, c_T, res_T;
1819 S0 c_T = ;
1820 S1 a_t = (type) c_T;
1821 S2 b_T = ;
1822 S3 res_T = b_T op a_t;
1824 Input/Output:
1826 * STMTS: Contains a stmt from which the pattern search begins,
1827 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1828 with a shift/rotate which has same type on both operands, in the
1829 second case just b_T op c_T, in the first case with added cast
1830 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1832 Output:
1834 * TYPE_IN: The type of the input arguments to the pattern.
1836 * TYPE_OUT: The type of the output of this pattern.
1838 * Return value: A new stmt that will be used to replace the shift/rotate
1839 S3 stmt. */
1841 static gimple
1842 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1843 tree *type_in, tree *type_out)
1845 gimple last_stmt = stmts->pop ();
1846 tree oprnd0, oprnd1, lhs, var;
1847 gimple pattern_stmt, def_stmt;
1848 enum tree_code rhs_code;
1849 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1852 enum vect_def_type dt;
1853 tree def;
1855 if (!is_gimple_assign (last_stmt))
1856 return NULL;
1858 rhs_code = gimple_assign_rhs_code (last_stmt);
1859 switch (rhs_code)
1861 case LSHIFT_EXPR:
1862 case RSHIFT_EXPR:
1863 case LROTATE_EXPR:
1864 case RROTATE_EXPR:
1865 break;
1866 default:
1867 return NULL;
1870 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1871 return NULL;
1873 lhs = gimple_assign_lhs (last_stmt);
1874 oprnd0 = gimple_assign_rhs1 (last_stmt);
1875 oprnd1 = gimple_assign_rhs2 (last_stmt);
1876 if (TREE_CODE (oprnd0) != SSA_NAME
1877 || TREE_CODE (oprnd1) != SSA_NAME
1878 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1879 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1880 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1881 || TYPE_PRECISION (TREE_TYPE (lhs))
1882 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1883 return NULL;
1885 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1886 &def, &dt))
1887 return NULL;
1889 if (dt != vect_internal_def)
1890 return NULL;
1892 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1893 *type_out = *type_in;
1894 if (*type_in == NULL_TREE)
1895 return NULL;
1897 def = NULL_TREE;
1898 if (gimple_assign_cast_p (def_stmt))
1900 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1901 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1902 && TYPE_PRECISION (TREE_TYPE (rhs1))
1903 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1904 def = rhs1;
1907 if (def == NULL_TREE)
1909 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1910 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1911 NULL_TREE);
1912 new_pattern_def_seq (stmt_vinfo, def_stmt);
1915 /* Pattern detected. */
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_NOTE, vect_location,
1918 "vect_recog_vector_vector_shift_pattern: detected:\n");
1920 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1921 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1922 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1924 if (dump_enabled_p ())
1925 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1927 stmts->safe_push (last_stmt);
1928 return pattern_stmt;
1931 /* Detect a signed division by a constant that wouldn't be
1932 otherwise vectorized:
1934 type a_t, b_t;
1936 S1 a_t = b_t / N;
1938 where type 'type' is an integral type and N is a constant.
1940 Similarly handle modulo by a constant:
1942 S4 a_t = b_t % N;
1944 Input/Output:
1946 * STMTS: Contains a stmt from which the pattern search begins,
1947 i.e. the division stmt. S1 is replaced by if N is a power
1948 of two constant and type is signed:
1949 S3 y_t = b_t < 0 ? N - 1 : 0;
1950 S2 x_t = b_t + y_t;
1951 S1' a_t = x_t >> log2 (N);
1953 S4 is replaced if N is a power of two constant and
1954 type is signed by (where *_T temporaries have unsigned type):
1955 S9 y_T = b_t < 0 ? -1U : 0U;
1956 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1957 S7 z_t = (type) z_T;
1958 S6 w_t = b_t + z_t;
1959 S5 x_t = w_t & (N - 1);
1960 S4' a_t = x_t - z_t;
1962 Output:
1964 * TYPE_IN: The type of the input arguments to the pattern.
1966 * TYPE_OUT: The type of the output of this pattern.
1968 * Return value: A new stmt that will be used to replace the division
1969 S1 or modulo S4 stmt. */
1971 static gimple
1972 vect_recog_divmod_pattern (vec<gimple> *stmts,
1973 tree *type_in, tree *type_out)
1975 gimple last_stmt = stmts->pop ();
1976 tree oprnd0, oprnd1, vectype, itype, cond;
1977 gimple pattern_stmt, def_stmt;
1978 enum tree_code rhs_code;
1979 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1980 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1981 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1982 optab optab;
1983 tree q;
1984 int dummy_int, prec;
1985 stmt_vec_info def_stmt_vinfo;
1987 if (!is_gimple_assign (last_stmt))
1988 return NULL;
1990 rhs_code = gimple_assign_rhs_code (last_stmt);
1991 switch (rhs_code)
1993 case TRUNC_DIV_EXPR:
1994 case TRUNC_MOD_EXPR:
1995 break;
1996 default:
1997 return NULL;
2000 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2001 return NULL;
2003 oprnd0 = gimple_assign_rhs1 (last_stmt);
2004 oprnd1 = gimple_assign_rhs2 (last_stmt);
2005 itype = TREE_TYPE (oprnd0);
2006 if (TREE_CODE (oprnd0) != SSA_NAME
2007 || TREE_CODE (oprnd1) != INTEGER_CST
2008 || TREE_CODE (itype) != INTEGER_TYPE
2009 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2010 return NULL;
2012 vectype = get_vectype_for_scalar_type (itype);
2013 if (vectype == NULL_TREE)
2014 return NULL;
2016 /* If the target can handle vectorized division or modulo natively,
2017 don't attempt to optimize this. */
2018 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2019 if (optab != unknown_optab)
2021 enum machine_mode vec_mode = TYPE_MODE (vectype);
2022 int icode = (int) optab_handler (optab, vec_mode);
2023 if (icode != CODE_FOR_nothing)
2024 return NULL;
2027 prec = TYPE_PRECISION (itype);
2028 if (integer_pow2p (oprnd1))
2030 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2031 return NULL;
2033 /* Pattern detected. */
2034 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_NOTE, vect_location,
2036 "vect_recog_divmod_pattern: detected:\n");
2038 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2039 build_int_cst (itype, 0));
2040 if (rhs_code == TRUNC_DIV_EXPR)
2042 tree var = vect_recog_temp_ssa_var (itype, NULL);
2043 tree shift;
2044 def_stmt
2045 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2046 fold_build2 (MINUS_EXPR, itype,
2047 oprnd1,
2048 build_int_cst (itype,
2049 1)),
2050 build_int_cst (itype, 0));
2051 new_pattern_def_seq (stmt_vinfo, def_stmt);
2052 var = vect_recog_temp_ssa_var (itype, NULL);
2053 def_stmt
2054 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2055 gimple_assign_lhs (def_stmt));
2056 append_pattern_def_seq (stmt_vinfo, def_stmt);
2058 shift = build_int_cst (itype, tree_log2 (oprnd1));
2059 pattern_stmt
2060 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2061 vect_recog_temp_ssa_var (itype,
2062 NULL),
2063 var, shift);
2065 else
2067 tree signmask;
2068 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2069 if (compare_tree_int (oprnd1, 2) == 0)
2071 signmask = vect_recog_temp_ssa_var (itype, NULL);
2072 def_stmt
2073 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2074 build_int_cst (itype, 1),
2075 build_int_cst (itype, 0));
2076 append_pattern_def_seq (stmt_vinfo, def_stmt);
2078 else
2080 tree utype
2081 = build_nonstandard_integer_type (prec, 1);
2082 tree vecutype = get_vectype_for_scalar_type (utype);
2083 tree shift
2084 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2085 - tree_log2 (oprnd1));
2086 tree var = vect_recog_temp_ssa_var (utype, NULL);
2088 def_stmt
2089 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2090 build_int_cst (utype, -1),
2091 build_int_cst (utype, 0));
2092 def_stmt_vinfo
2093 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2094 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2095 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2096 append_pattern_def_seq (stmt_vinfo, def_stmt);
2097 var = vect_recog_temp_ssa_var (utype, NULL);
2098 def_stmt
2099 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2100 gimple_assign_lhs (def_stmt),
2101 shift);
2102 def_stmt_vinfo
2103 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2104 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2105 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2106 append_pattern_def_seq (stmt_vinfo, def_stmt);
2107 signmask = vect_recog_temp_ssa_var (itype, NULL);
2108 def_stmt
2109 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2110 NULL_TREE);
2111 append_pattern_def_seq (stmt_vinfo, def_stmt);
2113 def_stmt
2114 = gimple_build_assign_with_ops (PLUS_EXPR,
2115 vect_recog_temp_ssa_var (itype,
2116 NULL),
2117 oprnd0, signmask);
2118 append_pattern_def_seq (stmt_vinfo, def_stmt);
2119 def_stmt
2120 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2121 vect_recog_temp_ssa_var (itype,
2122 NULL),
2123 gimple_assign_lhs (def_stmt),
2124 fold_build2 (MINUS_EXPR, itype,
2125 oprnd1,
2126 build_int_cst (itype,
2127 1)));
2128 append_pattern_def_seq (stmt_vinfo, def_stmt);
2130 pattern_stmt
2131 = gimple_build_assign_with_ops (MINUS_EXPR,
2132 vect_recog_temp_ssa_var (itype,
2133 NULL),
2134 gimple_assign_lhs (def_stmt),
2135 signmask);
2138 if (dump_enabled_p ())
2139 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2142 stmts->safe_push (last_stmt);
2144 *type_in = vectype;
2145 *type_out = vectype;
2146 return pattern_stmt;
2149 if (prec > HOST_BITS_PER_WIDE_INT
2150 || integer_zerop (oprnd1))
2151 return NULL;
2153 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2154 return NULL;
2156 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2158 if (TYPE_UNSIGNED (itype))
2160 unsigned HOST_WIDE_INT mh, ml;
2161 int pre_shift, post_shift;
2162 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2163 & GET_MODE_MASK (TYPE_MODE (itype)));
2164 tree t1, t2, t3, t4;
2166 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2167 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2168 return NULL;
2170 /* Find a suitable multiplier and right shift count
2171 instead of multiplying with D. */
2172 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2174 /* If the suggested multiplier is more than SIZE bits, we can do better
2175 for even divisors, using an initial right shift. */
2176 if (mh != 0 && (d & 1) == 0)
2178 pre_shift = floor_log2 (d & -d);
2179 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2180 &ml, &post_shift, &dummy_int);
2181 gcc_assert (!mh);
2183 else
2184 pre_shift = 0;
2186 if (mh != 0)
2188 if (post_shift - 1 >= prec)
2189 return NULL;
2191 /* t1 = oprnd0 h* ml;
2192 t2 = oprnd0 - t1;
2193 t3 = t2 >> 1;
2194 t4 = t1 + t3;
2195 q = t4 >> (post_shift - 1); */
2196 t1 = vect_recog_temp_ssa_var (itype, NULL);
2197 def_stmt
2198 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2199 build_int_cst (itype, ml));
2200 append_pattern_def_seq (stmt_vinfo, def_stmt);
2202 t2 = vect_recog_temp_ssa_var (itype, NULL);
2203 def_stmt
2204 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2205 append_pattern_def_seq (stmt_vinfo, def_stmt);
2207 t3 = vect_recog_temp_ssa_var (itype, NULL);
2208 def_stmt
2209 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2210 integer_one_node);
2211 append_pattern_def_seq (stmt_vinfo, def_stmt);
2213 t4 = vect_recog_temp_ssa_var (itype, NULL);
2214 def_stmt
2215 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2217 if (post_shift != 1)
2219 append_pattern_def_seq (stmt_vinfo, def_stmt);
2221 q = vect_recog_temp_ssa_var (itype, NULL);
2222 pattern_stmt
2223 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2224 build_int_cst (itype,
2225 post_shift
2226 - 1));
2228 else
2230 q = t4;
2231 pattern_stmt = def_stmt;
2234 else
2236 if (pre_shift >= prec || post_shift >= prec)
2237 return NULL;
2239 /* t1 = oprnd0 >> pre_shift;
2240 t2 = t1 h* ml;
2241 q = t2 >> post_shift; */
2242 if (pre_shift)
2244 t1 = vect_recog_temp_ssa_var (itype, NULL);
2245 def_stmt
2246 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2247 build_int_cst (NULL,
2248 pre_shift));
2249 append_pattern_def_seq (stmt_vinfo, def_stmt);
2251 else
2252 t1 = oprnd0;
2254 t2 = vect_recog_temp_ssa_var (itype, NULL);
2255 def_stmt
2256 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2257 build_int_cst (itype, ml));
2259 if (post_shift)
2261 append_pattern_def_seq (stmt_vinfo, def_stmt);
2263 q = vect_recog_temp_ssa_var (itype, NULL);
2264 def_stmt
2265 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2266 build_int_cst (itype,
2267 post_shift));
2269 else
2270 q = t2;
2272 pattern_stmt = def_stmt;
2275 else
2277 unsigned HOST_WIDE_INT ml;
2278 int post_shift;
2279 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2280 unsigned HOST_WIDE_INT abs_d;
2281 bool add = false;
2282 tree t1, t2, t3, t4;
2284 /* Give up for -1. */
2285 if (d == -1)
2286 return NULL;
2288 /* Since d might be INT_MIN, we have to cast to
2289 unsigned HOST_WIDE_INT before negating to avoid
2290 undefined signed overflow. */
2291 abs_d = (d >= 0
2292 ? (unsigned HOST_WIDE_INT) d
2293 : - (unsigned HOST_WIDE_INT) d);
2295 /* n rem d = n rem -d */
2296 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2298 d = abs_d;
2299 oprnd1 = build_int_cst (itype, abs_d);
2301 else if (HOST_BITS_PER_WIDE_INT >= prec
2302 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2303 /* This case is not handled correctly below. */
2304 return NULL;
2306 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2307 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2309 add = true;
2310 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2312 if (post_shift >= prec)
2313 return NULL;
2315 /* t1 = oprnd0 h* ml; */
2316 t1 = vect_recog_temp_ssa_var (itype, NULL);
2317 def_stmt
2318 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2319 build_int_cst (itype, ml));
2321 if (add)
2323 /* t2 = t1 + oprnd0; */
2324 append_pattern_def_seq (stmt_vinfo, def_stmt);
2325 t2 = vect_recog_temp_ssa_var (itype, NULL);
2326 def_stmt
2327 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2329 else
2330 t2 = t1;
2332 if (post_shift)
2334 /* t3 = t2 >> post_shift; */
2335 append_pattern_def_seq (stmt_vinfo, def_stmt);
2336 t3 = vect_recog_temp_ssa_var (itype, NULL);
2337 def_stmt
2338 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2339 build_int_cst (itype, post_shift));
2341 else
2342 t3 = t2;
2344 wide_int oprnd0_min, oprnd0_max;
2345 int msb = 1;
2346 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2348 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2349 msb = 0;
2350 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2351 msb = -1;
2354 if (msb == 0 && d >= 0)
2356 /* q = t3; */
2357 q = t3;
2358 pattern_stmt = def_stmt;
2360 else
2362 /* t4 = oprnd0 >> (prec - 1);
2363 or if we know from VRP that oprnd0 >= 0
2364 t4 = 0;
2365 or if we know from VRP that oprnd0 < 0
2366 t4 = -1; */
2367 append_pattern_def_seq (stmt_vinfo, def_stmt);
2368 t4 = vect_recog_temp_ssa_var (itype, NULL);
2369 if (msb != 1)
2370 def_stmt
2371 = gimple_build_assign_with_ops (INTEGER_CST,
2372 t4, build_int_cst (itype, msb),
2373 NULL_TREE);
2374 else
2375 def_stmt
2376 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2377 build_int_cst (itype, prec - 1));
2378 append_pattern_def_seq (stmt_vinfo, def_stmt);
2380 /* q = t3 - t4; or q = t4 - t3; */
2381 q = vect_recog_temp_ssa_var (itype, NULL);
2382 pattern_stmt
2383 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2384 d < 0 ? t3 : t4);
2388 if (rhs_code == TRUNC_MOD_EXPR)
2390 tree r, t1;
2392 /* We divided. Now finish by:
2393 t1 = q * oprnd1;
2394 r = oprnd0 - t1; */
2395 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2397 t1 = vect_recog_temp_ssa_var (itype, NULL);
2398 def_stmt
2399 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2400 append_pattern_def_seq (stmt_vinfo, def_stmt);
2402 r = vect_recog_temp_ssa_var (itype, NULL);
2403 pattern_stmt
2404 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2407 /* Pattern detected. */
2408 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_NOTE, vect_location,
2411 "vect_recog_divmod_pattern: detected: ");
2412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2413 dump_printf (MSG_NOTE, "\n");
2416 stmts->safe_push (last_stmt);
2418 *type_in = vectype;
2419 *type_out = vectype;
2420 return pattern_stmt;
2423 /* Function vect_recog_mixed_size_cond_pattern
2425 Try to find the following pattern:
2427 type x_t, y_t;
2428 TYPE a_T, b_T, c_T;
2429 loop:
2430 S1 a_T = x_t CMP y_t ? b_T : c_T;
2432 where type 'TYPE' is an integral type which has different size
2433 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2434 than 'type', the constants need to fit into an integer type
2435 with the same width as 'type') or results of conversion from 'type'.
2437 Input:
2439 * LAST_STMT: A stmt from which the pattern search begins.
2441 Output:
2443 * TYPE_IN: The type of the input arguments to the pattern.
2445 * TYPE_OUT: The type of the output of this pattern.
2447 * Return value: A new stmt that will be used to replace the pattern.
2448 Additionally a def_stmt is added.
2450 a_it = x_t CMP y_t ? b_it : c_it;
2451 a_T = (TYPE) a_it; */
2453 static gimple
2454 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2455 tree *type_out)
2457 gimple last_stmt = (*stmts)[0];
2458 tree cond_expr, then_clause, else_clause;
2459 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2460 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2461 enum machine_mode cmpmode;
2462 gimple pattern_stmt, def_stmt;
2463 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2464 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2465 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2466 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2467 bool promotion;
2468 tree comp_scalar_type;
2470 if (!is_gimple_assign (last_stmt)
2471 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2472 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2473 return NULL;
2475 cond_expr = gimple_assign_rhs1 (last_stmt);
2476 then_clause = gimple_assign_rhs2 (last_stmt);
2477 else_clause = gimple_assign_rhs3 (last_stmt);
2479 if (!COMPARISON_CLASS_P (cond_expr))
2480 return NULL;
2482 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2483 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2484 if (comp_vectype == NULL_TREE)
2485 return NULL;
2487 type = gimple_expr_type (last_stmt);
2488 if (types_compatible_p (type, comp_scalar_type)
2489 || ((TREE_CODE (then_clause) != INTEGER_CST
2490 || TREE_CODE (else_clause) != INTEGER_CST)
2491 && !INTEGRAL_TYPE_P (comp_scalar_type))
2492 || !INTEGRAL_TYPE_P (type))
2493 return NULL;
2495 if ((TREE_CODE (then_clause) != INTEGER_CST
2496 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2497 &def_stmt0, &promotion))
2498 || (TREE_CODE (else_clause) != INTEGER_CST
2499 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2500 &def_stmt1, &promotion)))
2501 return NULL;
2503 if (orig_type0 && orig_type1
2504 && !types_compatible_p (orig_type0, orig_type1))
2505 return NULL;
2507 if (orig_type0)
2509 if (!types_compatible_p (orig_type0, comp_scalar_type))
2510 return NULL;
2511 then_clause = gimple_assign_rhs1 (def_stmt0);
2512 itype = orig_type0;
2515 if (orig_type1)
2517 if (!types_compatible_p (orig_type1, comp_scalar_type))
2518 return NULL;
2519 else_clause = gimple_assign_rhs1 (def_stmt1);
2520 itype = orig_type1;
2523 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2525 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2526 return NULL;
2528 vectype = get_vectype_for_scalar_type (type);
2529 if (vectype == NULL_TREE)
2530 return NULL;
2532 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2533 return NULL;
2535 if (itype == NULL_TREE)
2536 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2537 TYPE_UNSIGNED (type));
2539 if (itype == NULL_TREE
2540 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2541 return NULL;
2543 vecitype = get_vectype_for_scalar_type (itype);
2544 if (vecitype == NULL_TREE)
2545 return NULL;
2547 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2548 return NULL;
2550 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2552 if ((TREE_CODE (then_clause) == INTEGER_CST
2553 && !int_fits_type_p (then_clause, itype))
2554 || (TREE_CODE (else_clause) == INTEGER_CST
2555 && !int_fits_type_p (else_clause, itype)))
2556 return NULL;
2559 def_stmt
2560 = gimple_build_assign_with_ops (COND_EXPR,
2561 vect_recog_temp_ssa_var (itype, NULL),
2562 unshare_expr (cond_expr),
2563 fold_convert (itype, then_clause),
2564 fold_convert (itype, else_clause));
2565 pattern_stmt
2566 = gimple_build_assign_with_ops (NOP_EXPR,
2567 vect_recog_temp_ssa_var (type, NULL),
2568 gimple_assign_lhs (def_stmt), NULL_TREE);
2570 new_pattern_def_seq (stmt_vinfo, def_stmt);
2571 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2572 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2573 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2574 *type_in = vecitype;
2575 *type_out = vectype;
2577 if (dump_enabled_p ())
2578 dump_printf_loc (MSG_NOTE, vect_location,
2579 "vect_recog_mixed_size_cond_pattern: detected:\n");
2581 return pattern_stmt;
2585 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2586 true if bool VAR can be optimized that way. */
2588 static bool
2589 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2591 gimple def_stmt;
2592 enum vect_def_type dt;
2593 tree def, rhs1;
2594 enum tree_code rhs_code;
2596 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2597 &dt))
2598 return false;
2600 if (dt != vect_internal_def)
2601 return false;
2603 if (!is_gimple_assign (def_stmt))
2604 return false;
2606 if (!has_single_use (def))
2607 return false;
2609 rhs1 = gimple_assign_rhs1 (def_stmt);
2610 rhs_code = gimple_assign_rhs_code (def_stmt);
2611 switch (rhs_code)
2613 case SSA_NAME:
2614 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2616 CASE_CONVERT:
2617 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2618 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2619 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2620 return false;
2621 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2623 case BIT_NOT_EXPR:
2624 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2626 case BIT_AND_EXPR:
2627 case BIT_IOR_EXPR:
2628 case BIT_XOR_EXPR:
2629 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2630 return false;
2631 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2632 bb_vinfo);
2634 default:
2635 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2637 tree vecitype, comp_vectype;
2639 /* If the comparison can throw, then is_gimple_condexpr will be
2640 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2641 if (stmt_could_throw_p (def_stmt))
2642 return false;
2644 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2645 if (comp_vectype == NULL_TREE)
2646 return false;
2648 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2650 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2651 tree itype
2652 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2653 vecitype = get_vectype_for_scalar_type (itype);
2654 if (vecitype == NULL_TREE)
2655 return false;
2657 else
2658 vecitype = comp_vectype;
2659 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2661 return false;
2666 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2667 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2668 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2670 static tree
2671 adjust_bool_pattern_cast (tree type, tree var)
2673 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2674 gimple cast_stmt, pattern_stmt;
2676 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2677 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2678 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2679 cast_stmt
2680 = gimple_build_assign_with_ops (NOP_EXPR,
2681 vect_recog_temp_ssa_var (type, NULL),
2682 gimple_assign_lhs (pattern_stmt),
2683 NULL_TREE);
2684 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2685 return gimple_assign_lhs (cast_stmt);
2689 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2690 recursively. VAR is an SSA_NAME that should be transformed from bool
2691 to a wider integer type, OUT_TYPE is the desired final integer type of
2692 the whole pattern, TRUEVAL should be NULL unless optimizing
2693 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2694 in the then_clause, STMTS is where statements with added pattern stmts
2695 should be pushed to. */
2697 static tree
2698 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2699 vec<gimple> *stmts)
2701 gimple stmt = SSA_NAME_DEF_STMT (var);
2702 enum tree_code rhs_code, def_rhs_code;
2703 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2704 location_t loc;
2705 gimple pattern_stmt, def_stmt;
2707 rhs1 = gimple_assign_rhs1 (stmt);
2708 rhs2 = gimple_assign_rhs2 (stmt);
2709 rhs_code = gimple_assign_rhs_code (stmt);
2710 loc = gimple_location (stmt);
2711 switch (rhs_code)
2713 case SSA_NAME:
2714 CASE_CONVERT:
2715 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2716 itype = TREE_TYPE (irhs1);
2717 pattern_stmt
2718 = gimple_build_assign_with_ops (SSA_NAME,
2719 vect_recog_temp_ssa_var (itype, NULL),
2720 irhs1, NULL_TREE);
2721 break;
2723 case BIT_NOT_EXPR:
2724 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2725 itype = TREE_TYPE (irhs1);
2726 pattern_stmt
2727 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2728 vect_recog_temp_ssa_var (itype, NULL),
2729 irhs1, build_int_cst (itype, 1));
2730 break;
2732 case BIT_AND_EXPR:
2733 /* Try to optimize x = y & (a < b ? 1 : 0); into
2734 x = (a < b ? y : 0);
2736 E.g. for:
2737 bool a_b, b_b, c_b;
2738 TYPE d_T;
2740 S1 a_b = x1 CMP1 y1;
2741 S2 b_b = x2 CMP2 y2;
2742 S3 c_b = a_b & b_b;
2743 S4 d_T = (TYPE) c_b;
2745 we would normally emit:
2747 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2748 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2749 S3' c_T = a_T & b_T;
2750 S4' d_T = c_T;
2752 but we can save one stmt by using the
2753 result of one of the COND_EXPRs in the other COND_EXPR and leave
2754 BIT_AND_EXPR stmt out:
2756 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2757 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2758 S4' f_T = c_T;
2760 At least when VEC_COND_EXPR is implemented using masks
2761 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2762 computes the comparison masks and ands it, in one case with
2763 all ones vector, in the other case with a vector register.
2764 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2765 often more expensive. */
2766 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2767 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2768 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2770 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2771 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2772 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2773 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2775 gimple tstmt;
2776 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2777 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2778 tstmt = stmts->pop ();
2779 gcc_assert (tstmt == def_stmt);
2780 stmts->quick_push (stmt);
2781 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2782 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2783 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2784 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2785 return irhs2;
2787 else
2788 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2789 goto and_ior_xor;
2791 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2792 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2793 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2795 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2796 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2797 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2798 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2800 gimple tstmt;
2801 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2802 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2803 tstmt = stmts->pop ();
2804 gcc_assert (tstmt == def_stmt);
2805 stmts->quick_push (stmt);
2806 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2807 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2808 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2809 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2810 return irhs1;
2812 else
2813 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2814 goto and_ior_xor;
2816 /* FALLTHRU */
2817 case BIT_IOR_EXPR:
2818 case BIT_XOR_EXPR:
2819 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2820 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2821 and_ior_xor:
2822 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2823 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2825 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2826 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2827 int out_prec = TYPE_PRECISION (out_type);
2828 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2829 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2830 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2831 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2832 else
2834 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2835 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2838 itype = TREE_TYPE (irhs1);
2839 pattern_stmt
2840 = gimple_build_assign_with_ops (rhs_code,
2841 vect_recog_temp_ssa_var (itype, NULL),
2842 irhs1, irhs2);
2843 break;
2845 default:
2846 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2847 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2848 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2849 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2850 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2852 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2853 itype
2854 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2856 else
2857 itype = TREE_TYPE (rhs1);
2858 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2859 if (trueval == NULL_TREE)
2860 trueval = build_int_cst (itype, 1);
2861 else
2862 gcc_checking_assert (useless_type_conversion_p (itype,
2863 TREE_TYPE (trueval)));
2864 pattern_stmt
2865 = gimple_build_assign_with_ops (COND_EXPR,
2866 vect_recog_temp_ssa_var (itype, NULL),
2867 cond_expr, trueval,
2868 build_int_cst (itype, 0));
2869 break;
2872 stmts->safe_push (stmt);
2873 gimple_set_location (pattern_stmt, loc);
2874 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2875 return gimple_assign_lhs (pattern_stmt);
2879 /* Function vect_recog_bool_pattern
2881 Try to find pattern like following:
2883 bool a_b, b_b, c_b, d_b, e_b;
2884 TYPE f_T;
2885 loop:
2886 S1 a_b = x1 CMP1 y1;
2887 S2 b_b = x2 CMP2 y2;
2888 S3 c_b = a_b & b_b;
2889 S4 d_b = x3 CMP3 y3;
2890 S5 e_b = c_b | d_b;
2891 S6 f_T = (TYPE) e_b;
2893 where type 'TYPE' is an integral type. Or a similar pattern
2894 ending in
2896 S6 f_Y = e_b ? r_Y : s_Y;
2898 as results from if-conversion of a complex condition.
2900 Input:
2902 * LAST_STMT: A stmt at the end from which the pattern
2903 search begins, i.e. cast of a bool to
2904 an integer type.
2906 Output:
2908 * TYPE_IN: The type of the input arguments to the pattern.
2910 * TYPE_OUT: The type of the output of this pattern.
2912 * Return value: A new stmt that will be used to replace the pattern.
2914 Assuming size of TYPE is the same as size of all comparisons
2915 (otherwise some casts would be added where needed), the above
2916 sequence we create related pattern stmts:
2917 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2918 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2919 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2920 S5' e_T = c_T | d_T;
2921 S6' f_T = e_T;
2923 Instead of the above S3' we could emit:
2924 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2925 S3' c_T = a_T | b_T;
2926 but the above is more efficient. */
2928 static gimple
2929 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2930 tree *type_out)
2932 gimple last_stmt = stmts->pop ();
2933 enum tree_code rhs_code;
2934 tree var, lhs, rhs, vectype;
2935 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2936 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2937 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2938 gimple pattern_stmt;
2940 if (!is_gimple_assign (last_stmt))
2941 return NULL;
2943 var = gimple_assign_rhs1 (last_stmt);
2944 lhs = gimple_assign_lhs (last_stmt);
2946 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2947 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2948 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2949 return NULL;
2951 rhs_code = gimple_assign_rhs_code (last_stmt);
2952 if (CONVERT_EXPR_CODE_P (rhs_code))
2954 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2955 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2956 return NULL;
2957 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2958 if (vectype == NULL_TREE)
2959 return NULL;
2961 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2962 return NULL;
2964 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2965 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2966 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2967 pattern_stmt
2968 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2969 else
2970 pattern_stmt
2971 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2972 *type_out = vectype;
2973 *type_in = vectype;
2974 stmts->safe_push (last_stmt);
2975 if (dump_enabled_p ())
2976 dump_printf_loc (MSG_NOTE, vect_location,
2977 "vect_recog_bool_pattern: detected:\n");
2979 return pattern_stmt;
2981 else if (rhs_code == COND_EXPR
2982 && TREE_CODE (var) == SSA_NAME)
2984 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2985 if (vectype == NULL_TREE)
2986 return NULL;
2988 /* Build a scalar type for the boolean result that when
2989 vectorized matches the vector type of the result in
2990 size and number of elements. */
2991 unsigned prec
2992 = wi::udiv_trunc (TYPE_SIZE (vectype),
2993 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
2994 tree type
2995 = build_nonstandard_integer_type (prec,
2996 TYPE_UNSIGNED (TREE_TYPE (var)));
2997 if (get_vectype_for_scalar_type (type) == NULL_TREE)
2998 return NULL;
3000 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3001 return NULL;
3003 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3004 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3005 pattern_stmt
3006 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3007 build2 (NE_EXPR, boolean_type_node,
3008 rhs, build_int_cst (type, 0)),
3009 gimple_assign_rhs2 (last_stmt),
3010 gimple_assign_rhs3 (last_stmt));
3011 *type_out = vectype;
3012 *type_in = vectype;
3013 stmts->safe_push (last_stmt);
3014 if (dump_enabled_p ())
3015 dump_printf_loc (MSG_NOTE, vect_location,
3016 "vect_recog_bool_pattern: detected:\n");
3018 return pattern_stmt;
3020 else if (rhs_code == SSA_NAME
3021 && STMT_VINFO_DATA_REF (stmt_vinfo))
3023 stmt_vec_info pattern_stmt_info;
3024 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3025 gcc_assert (vectype != NULL_TREE);
3026 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3027 return NULL;
3028 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3029 return NULL;
3031 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3032 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3033 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3035 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3036 gimple cast_stmt
3037 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
3038 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3039 rhs = rhs2;
3041 pattern_stmt
3042 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3043 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3044 bb_vinfo);
3045 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3046 STMT_VINFO_DATA_REF (pattern_stmt_info)
3047 = STMT_VINFO_DATA_REF (stmt_vinfo);
3048 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3049 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3050 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3051 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3052 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3053 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3054 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3055 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3056 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3057 *type_out = vectype;
3058 *type_in = vectype;
3059 stmts->safe_push (last_stmt);
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_NOTE, vect_location,
3062 "vect_recog_bool_pattern: detected:\n");
3063 return pattern_stmt;
3065 else
3066 return NULL;
3070 /* Mark statements that are involved in a pattern. */
3072 static inline void
3073 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3074 tree pattern_vectype)
3076 stmt_vec_info pattern_stmt_info, def_stmt_info;
3077 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3078 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3079 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3080 gimple def_stmt;
3082 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3083 if (pattern_stmt_info == NULL)
3085 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3086 bb_vinfo);
3087 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3089 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3091 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3092 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3093 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3094 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3095 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3096 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3097 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3098 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3099 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3101 gimple_stmt_iterator si;
3102 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3103 !gsi_end_p (si); gsi_next (&si))
3105 def_stmt = gsi_stmt (si);
3106 def_stmt_info = vinfo_for_stmt (def_stmt);
3107 if (def_stmt_info == NULL)
3109 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3110 bb_vinfo);
3111 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3113 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3114 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3115 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3116 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3117 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3122 /* Function vect_pattern_recog_1
3124 Input:
3125 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3126 computation pattern.
3127 STMT: A stmt from which the pattern search should start.
3129 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3130 expression that computes the same functionality and can be used to
3131 replace the sequence of stmts that are involved in the pattern.
3133 Output:
3134 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3135 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3136 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3137 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3138 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3139 to the available target pattern.
3141 This function also does some bookkeeping, as explained in the documentation
3142 for vect_recog_pattern. */
3144 static void
3145 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3146 gimple_stmt_iterator si,
3147 vec<gimple> *stmts_to_replace)
3149 gimple stmt = gsi_stmt (si), pattern_stmt;
3150 stmt_vec_info stmt_info;
3151 loop_vec_info loop_vinfo;
3152 tree pattern_vectype;
3153 tree type_in, type_out;
3154 enum tree_code code;
3155 int i;
3156 gimple next;
3158 stmts_to_replace->truncate (0);
3159 stmts_to_replace->quick_push (stmt);
3160 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3161 if (!pattern_stmt)
3162 return;
3164 stmt = stmts_to_replace->last ();
3165 stmt_info = vinfo_for_stmt (stmt);
3166 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3168 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3170 /* No need to check target support (already checked by the pattern
3171 recognition function). */
3172 pattern_vectype = type_out ? type_out : type_in;
3174 else
3176 enum machine_mode vec_mode;
3177 enum insn_code icode;
3178 optab optab;
3180 /* Check target support */
3181 type_in = get_vectype_for_scalar_type (type_in);
3182 if (!type_in)
3183 return;
3184 if (type_out)
3185 type_out = get_vectype_for_scalar_type (type_out);
3186 else
3187 type_out = type_in;
3188 if (!type_out)
3189 return;
3190 pattern_vectype = type_out;
3192 if (is_gimple_assign (pattern_stmt))
3193 code = gimple_assign_rhs_code (pattern_stmt);
3194 else
3196 gcc_assert (is_gimple_call (pattern_stmt));
3197 code = CALL_EXPR;
3200 optab = optab_for_tree_code (code, type_in, optab_default);
3201 vec_mode = TYPE_MODE (type_in);
3202 if (!optab
3203 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3204 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3205 return;
3208 /* Found a vectorizable pattern. */
3209 if (dump_enabled_p ())
3211 dump_printf_loc (MSG_NOTE, vect_location,
3212 "pattern recognized: ");
3213 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3214 dump_printf (MSG_NOTE, "\n");
3217 /* Mark the stmts that are involved in the pattern. */
3218 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3220 /* Patterns cannot be vectorized using SLP, because they change the order of
3221 computation. */
3222 if (loop_vinfo)
3223 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3224 if (next == stmt)
3225 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3227 /* It is possible that additional pattern stmts are created and inserted in
3228 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3229 relevant statements. */
3230 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3231 && (unsigned) i < (stmts_to_replace->length () - 1);
3232 i++)
3234 stmt_info = vinfo_for_stmt (stmt);
3235 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3236 if (dump_enabled_p ())
3238 dump_printf_loc (MSG_NOTE, vect_location,
3239 "additional pattern stmt: ");
3240 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3241 dump_printf (MSG_NOTE, "\n");
3244 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3249 /* Function vect_pattern_recog
3251 Input:
3252 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3253 computation idioms.
3255 Output - for each computation idiom that is detected we create a new stmt
3256 that provides the same functionality and that can be vectorized. We
3257 also record some information in the struct_stmt_info of the relevant
3258 stmts, as explained below:
3260 At the entry to this function we have the following stmts, with the
3261 following initial value in the STMT_VINFO fields:
3263 stmt in_pattern_p related_stmt vec_stmt
3264 S1: a_i = .... - - -
3265 S2: a_2 = ..use(a_i).. - - -
3266 S3: a_1 = ..use(a_2).. - - -
3267 S4: a_0 = ..use(a_1).. - - -
3268 S5: ... = ..use(a_0).. - - -
3270 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3271 represented by a single stmt. We then:
3272 - create a new stmt S6 equivalent to the pattern (the stmt is not
3273 inserted into the code)
3274 - fill in the STMT_VINFO fields as follows:
3276 in_pattern_p related_stmt vec_stmt
3277 S1: a_i = .... - - -
3278 S2: a_2 = ..use(a_i).. - - -
3279 S3: a_1 = ..use(a_2).. - - -
3280 S4: a_0 = ..use(a_1).. true S6 -
3281 '---> S6: a_new = .... - S4 -
3282 S5: ... = ..use(a_0).. - - -
3284 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3285 to each other through the RELATED_STMT field).
3287 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3288 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3289 remain irrelevant unless used by stmts other than S4.
3291 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3292 (because they are marked as irrelevant). It will vectorize S6, and record
3293 a pointer to the new vector stmt VS6 from S6 (as usual).
3294 S4 will be skipped, and S5 will be vectorized as usual:
3296 in_pattern_p related_stmt vec_stmt
3297 S1: a_i = .... - - -
3298 S2: a_2 = ..use(a_i).. - - -
3299 S3: a_1 = ..use(a_2).. - - -
3300 > VS6: va_new = .... - - -
3301 S4: a_0 = ..use(a_1).. true S6 VS6
3302 '---> S6: a_new = .... - S4 VS6
3303 > VS5: ... = ..vuse(va_new).. - - -
3304 S5: ... = ..use(a_0).. - - -
3306 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3307 elsewhere), and we'll end up with:
3309 VS6: va_new = ....
3310 VS5: ... = ..vuse(va_new)..
3312 In case of more than one pattern statements, e.g., widen-mult with
3313 intermediate type:
3315 S1 a_t = ;
3316 S2 a_T = (TYPE) a_t;
3317 '--> S3: a_it = (interm_type) a_t;
3318 S4 prod_T = a_T * CONST;
3319 '--> S5: prod_T' = a_it w* CONST;
3321 there may be other users of a_T outside the pattern. In that case S2 will
3322 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3323 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3324 be recorded in S3. */
3326 void
3327 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3329 struct loop *loop;
3330 basic_block *bbs;
3331 unsigned int nbbs;
3332 gimple_stmt_iterator si;
3333 unsigned int i, j;
3334 vect_recog_func_ptr vect_recog_func;
3335 auto_vec<gimple, 1> stmts_to_replace;
3336 gimple stmt;
3338 if (dump_enabled_p ())
3339 dump_printf_loc (MSG_NOTE, vect_location,
3340 "=== vect_pattern_recog ===\n");
3342 if (loop_vinfo)
3344 loop = LOOP_VINFO_LOOP (loop_vinfo);
3345 bbs = LOOP_VINFO_BBS (loop_vinfo);
3346 nbbs = loop->num_nodes;
3348 else
3350 bbs = &BB_VINFO_BB (bb_vinfo);
3351 nbbs = 1;
3354 /* Scan through the loop stmts, applying the pattern recognition
3355 functions starting at each stmt visited: */
3356 for (i = 0; i < nbbs; i++)
3358 basic_block bb = bbs[i];
3359 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3361 if (bb_vinfo && (stmt = gsi_stmt (si))
3362 && vinfo_for_stmt (stmt)
3363 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3364 continue;
3366 /* Scan over all generic vect_recog_xxx_pattern functions. */
3367 for (j = 0; j < NUM_PATTERNS; j++)
3369 vect_recog_func = vect_vect_recog_func_ptrs[j];
3370 vect_pattern_recog_1 (vect_recog_func, si,
3371 &stmts_to_replace);